mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-13 08:54:59 +00:00
[SelectionDAG] Reverse the order of operands in the ISD::ADD created by TargetLowering::getVectorElementPointer so that the FrameIndex is on the left.
This seems to improve X86's ability to match this into an address computation. Otherwise the other operand gets assigned to the base register and the stack pointer + frame index ends up in the index register. But index registers can't encode ESP/RSP so we end up having to move it into another register to meet the constraint. I could try to improve the address matcher in X86, but swapping the producer seemed easier. Several other places already have the operands in this order so this is at least consistent. llvm-svn: 321370
This commit is contained in:
parent
acd88472c6
commit
e32e202b52
@ -3812,7 +3812,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
|
||||
|
||||
Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
|
||||
DAG.getConstant(EltSize, dl, IdxVT));
|
||||
return DAG.getNode(ISD::ADD, dl, IdxVT, Index, VecPtr);
|
||||
return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -15,5 +15,5 @@ define i1 @via_stack_bug(i8 signext %idx) {
|
||||
; ALL-DAG: sh [[ONE]], 6($sp)
|
||||
; ALL-DAG: andi [[MASKED_IDX:\$[0-9]+]], $4, 1
|
||||
; ALL-DAG: addiu [[VPTR:\$[0-9]+]], $sp, 6
|
||||
; ALL-DAG: or [[EPTR:\$[0-9]+]], [[MASKED_IDX]], [[VPTR]]
|
||||
; ALL-DAG: or [[EPTR:\$[0-9]+]], [[VPTR]], [[MASKED_IDX]]
|
||||
; ALL: lbu $2, 0([[EPTR]])
|
||||
|
@ -25,7 +25,7 @@ entry:
|
||||
; CHECK: extsw 3, [[RSHREG]]
|
||||
; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 2, 28, 29
|
||||
; CHECK-P7-DAG: stxvw4x 34,
|
||||
; CHECK-P7: lwax 3, [[ELEMOFFREG]],
|
||||
; CHECK-P7: lwax 3, 3, [[ELEMOFFREG]]
|
||||
; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 2
|
||||
; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 2
|
||||
; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]]
|
||||
@ -54,7 +54,7 @@ entry:
|
||||
; CHECK: mfvsrd 3,
|
||||
; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 3, 28, 28
|
||||
; CHECK-P7-DAG: stxvd2x 34,
|
||||
; CHECK-P7: ldx 3, [[ELEMOFFREG]],
|
||||
; CHECK-P7: ldx 3, 3, [[ELEMOFFREG]]
|
||||
; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1
|
||||
; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3
|
||||
; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]]
|
||||
@ -77,7 +77,7 @@ entry:
|
||||
; CHECK: xscvspdpn 1,
|
||||
; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 2, 28, 29
|
||||
; CHECK-P7-DAG: stxvw4x 34,
|
||||
; CHECK-P7: lfsx 1, [[ELEMOFFREG]],
|
||||
; CHECK-P7: lfsx 1, 3, [[ELEMOFFREG]]
|
||||
; CHECK-BE: sldi [[ELNOREG:[0-9]+]], 5, 2
|
||||
; CHECK-BE: lvsl [[SHMSKREG:[0-9]+]], 0, [[ELNOREG]]
|
||||
; CHECK-BE: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
|
||||
|
@ -1431,8 +1431,7 @@ define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) {
|
||||
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
||||
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: andl $15, %edi
|
||||
; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
|
||||
; CHECK-NEXT: movb (%rdi,%rax), %al
|
||||
; CHECK-NEXT: movb -24(%rsp,%rdi), %al
|
||||
; CHECK-NEXT: retq
|
||||
%t2 = extractelement <16 x i8> %t1, i32 %index
|
||||
ret i8 %t2
|
||||
@ -1451,8 +1450,7 @@ define i8 @test_extractelement_variable_v32i8(<32 x i8> %t1, i32 %index) {
|
||||
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
||||
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; CHECK-NEXT: andl $31, %edi
|
||||
; CHECK-NEXT: movq %rsp, %rax
|
||||
; CHECK-NEXT: movb (%rdi,%rax), %al
|
||||
; CHECK-NEXT: movb (%rsp,%rdi), %al
|
||||
; CHECK-NEXT: movq %rbp, %rsp
|
||||
; CHECK-NEXT: popq %rbp
|
||||
; CHECK-NEXT: vzeroupper
|
||||
@ -1476,8 +1474,7 @@ define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) {
|
||||
; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; KNL-NEXT: andl $63, %edi
|
||||
; KNL-NEXT: movq %rsp, %rax
|
||||
; KNL-NEXT: movb (%rdi,%rax), %al
|
||||
; KNL-NEXT: movb (%rsp,%rdi), %al
|
||||
; KNL-NEXT: movq %rbp, %rsp
|
||||
; KNL-NEXT: popq %rbp
|
||||
; KNL-NEXT: vzeroupper
|
||||
@ -1495,8 +1492,7 @@ define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) {
|
||||
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
|
||||
; SKX-NEXT: vmovaps %zmm0, (%rsp)
|
||||
; SKX-NEXT: andl $63, %edi
|
||||
; SKX-NEXT: movq %rsp, %rax
|
||||
; SKX-NEXT: movb (%rdi,%rax), %al
|
||||
; SKX-NEXT: movb (%rsp,%rdi), %al
|
||||
; SKX-NEXT: movq %rbp, %rsp
|
||||
; SKX-NEXT: popq %rbp
|
||||
; SKX-NEXT: vzeroupper
|
||||
@ -1521,8 +1517,7 @@ define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index)
|
||||
; KNL-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; KNL-NEXT: movzbl %dil, %eax
|
||||
; KNL-NEXT: andl $63, %eax
|
||||
; KNL-NEXT: movq %rsp, %rcx
|
||||
; KNL-NEXT: movb (%rax,%rcx), %al
|
||||
; KNL-NEXT: movb (%rsp,%rax), %al
|
||||
; KNL-NEXT: movq %rbp, %rsp
|
||||
; KNL-NEXT: popq %rbp
|
||||
; KNL-NEXT: vzeroupper
|
||||
@ -1541,8 +1536,7 @@ define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index)
|
||||
; SKX-NEXT: vmovaps %zmm0, (%rsp)
|
||||
; SKX-NEXT: movzbl %dil, %eax
|
||||
; SKX-NEXT: andl $63, %eax
|
||||
; SKX-NEXT: movq %rsp, %rcx
|
||||
; SKX-NEXT: movb (%rax,%rcx), %al
|
||||
; SKX-NEXT: movb (%rsp,%rax), %al
|
||||
; SKX-NEXT: movq %rbp, %rsp
|
||||
; SKX-NEXT: popq %rbp
|
||||
; SKX-NEXT: vzeroupper
|
||||
@ -1655,8 +1649,7 @@ define zeroext i8 @test_extractelement_varible_v16i1(<16 x i32> %a, <16 x i32> %
|
||||
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: andl $15, %edi
|
||||
; KNL-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
|
||||
; KNL-NEXT: movzbl (%rdi,%rax), %eax
|
||||
; KNL-NEXT: movzbl -24(%rsp,%rdi), %eax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
@ -1668,8 +1661,7 @@ define zeroext i8 @test_extractelement_varible_v16i1(<16 x i32> %a, <16 x i32> %
|
||||
; SKX-NEXT: vpmovm2b %k0, %xmm0
|
||||
; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SKX-NEXT: andl $15, %edi
|
||||
; SKX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
|
||||
; SKX-NEXT: movzbl (%rdi,%rax), %eax
|
||||
; SKX-NEXT: movzbl -24(%rsp,%rdi), %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
@ -1696,8 +1688,7 @@ define zeroext i8 @test_extractelement_varible_v32i1(<32 x i8> %a, <32 x i8> %b,
|
||||
; KNL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: vmovdqa %ymm0, (%rsp)
|
||||
; KNL-NEXT: andl $31, %edi
|
||||
; KNL-NEXT: movq %rsp, %rax
|
||||
; KNL-NEXT: movzbl (%rdi,%rax), %eax
|
||||
; KNL-NEXT: movzbl (%rsp,%rdi), %eax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: movq %rbp, %rsp
|
||||
; KNL-NEXT: popq %rbp
|
||||
@ -1718,8 +1709,7 @@ define zeroext i8 @test_extractelement_varible_v32i1(<32 x i8> %a, <32 x i8> %b,
|
||||
; SKX-NEXT: vpmovm2b %k0, %ymm0
|
||||
; SKX-NEXT: vmovdqa %ymm0, (%rsp)
|
||||
; SKX-NEXT: andl $31, %edi
|
||||
; SKX-NEXT: movq %rsp, %rax
|
||||
; SKX-NEXT: movzbl (%rdi,%rax), %eax
|
||||
; SKX-NEXT: movzbl (%rsp,%rdi), %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: movq %rbp, %rsp
|
||||
; SKX-NEXT: popq %rbp
|
||||
@ -1760,8 +1750,7 @@ define i32 @test_insertelement_variable_v32i1(<32 x i8> %a, i8 %b, i32 %index) {
|
||||
; KNL-NEXT: andl $31, %esi
|
||||
; KNL-NEXT: testb %dil, %dil
|
||||
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rax
|
||||
; KNL-NEXT: setne (%rsi,%rax)
|
||||
; KNL-NEXT: setne 32(%rsp,%rsi)
|
||||
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0
|
||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
@ -1794,8 +1783,7 @@ define i32 @test_insertelement_variable_v32i1(<32 x i8> %a, i8 %b, i32 %index) {
|
||||
; SKX-NEXT: testb %dil, %dil
|
||||
; SKX-NEXT: vpmovm2b %k0, %ymm0
|
||||
; SKX-NEXT: vmovdqa %ymm0, (%rsp)
|
||||
; SKX-NEXT: movq %rsp, %rax
|
||||
; SKX-NEXT: setne (%rsi,%rax)
|
||||
; SKX-NEXT: setne (%rsp,%rsi)
|
||||
; SKX-NEXT: vpsllw $7, (%rsp), %ymm0
|
||||
; SKX-NEXT: vpmovb2m %ymm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
@ -1830,8 +1818,7 @@ define i64 @test_insertelement_variable_v64i1(<64 x i8> %a, i8 %b, i32 %index) {
|
||||
; KNL-NEXT: testb %dil, %dil
|
||||
; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rax
|
||||
; KNL-NEXT: setne (%rsi,%rax)
|
||||
; KNL-NEXT: setne 64(%rsp,%rsi)
|
||||
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0
|
||||
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1
|
||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
@ -1877,8 +1864,7 @@ define i64 @test_insertelement_variable_v64i1(<64 x i8> %a, i8 %b, i32 %index) {
|
||||
; SKX-NEXT: testb %dil, %dil
|
||||
; SKX-NEXT: vpmovm2b %k0, %zmm0
|
||||
; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
|
||||
; SKX-NEXT: movq %rsp, %rax
|
||||
; SKX-NEXT: setne (%rsi,%rax)
|
||||
; SKX-NEXT: setne (%rsp,%rsi)
|
||||
; SKX-NEXT: vpsllw $7, (%rsp), %zmm0
|
||||
; SKX-NEXT: vpmovb2m %zmm0, %k0
|
||||
; SKX-NEXT: kmovq %k0, %rax
|
||||
@ -2017,8 +2003,7 @@ define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) {
|
||||
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; KNL-NEXT: setne (%rax,%rcx)
|
||||
; KNL-NEXT: setne 128(%rsp,%rax)
|
||||
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1
|
||||
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm2
|
||||
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3
|
||||
@ -2191,8 +2176,7 @@ define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) {
|
||||
; SKX-NEXT: vmovdqa32 %zmm0, {{[0-9]+}}(%rsp)
|
||||
; SKX-NEXT: vpmovm2b %k0, %zmm0
|
||||
; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
|
||||
; SKX-NEXT: movq %rsp, %rcx
|
||||
; SKX-NEXT: setne (%rax,%rcx)
|
||||
; SKX-NEXT: setne (%rsp,%rax)
|
||||
; SKX-NEXT: vpsllw $7, {{[0-9]+}}(%rsp), %zmm0
|
||||
; SKX-NEXT: vpmovb2m %zmm0, %k0
|
||||
; SKX-NEXT: vpsllw $7, (%rsp), %zmm0
|
||||
@ -2236,8 +2220,7 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index
|
||||
; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rax
|
||||
; KNL-NEXT: setne (%rsi,%rax)
|
||||
; KNL-NEXT: setne 128(%rsp,%rsi)
|
||||
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1
|
||||
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm2
|
||||
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3
|
||||
@ -2310,8 +2293,7 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index
|
||||
; SKX-NEXT: vmovdqa32 %zmm0, {{[0-9]+}}(%rsp)
|
||||
; SKX-NEXT: vpmovm2b %k0, %zmm0
|
||||
; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
|
||||
; SKX-NEXT: movq %rsp, %rax
|
||||
; SKX-NEXT: setne (%rsi,%rax)
|
||||
; SKX-NEXT: setne (%rsp,%rsi)
|
||||
; SKX-NEXT: vpsllw $7, {{[0-9]+}}(%rsp), %zmm0
|
||||
; SKX-NEXT: vpmovb2m %zmm0, %k0
|
||||
; SKX-NEXT: vpsllw $7, (%rsp), %zmm0
|
||||
|
@ -18,8 +18,7 @@ define zeroext i8 @test_extractelement_varible_v64i1(<64 x i8> %a, <64 x i8> %b,
|
||||
; SKX-NEXT: vpmovm2b %k0, %zmm0
|
||||
; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
|
||||
; SKX-NEXT: andl $63, %edi
|
||||
; SKX-NEXT: movq %rsp, %rax
|
||||
; SKX-NEXT: movzbl (%rdi,%rax), %eax
|
||||
; SKX-NEXT: movzbl (%rsp,%rdi), %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: movq %rbp, %rsp
|
||||
; SKX-NEXT: popq %rbp
|
||||
|
@ -403,16 +403,14 @@ define i8 @extractelement_v16i8_var(<16 x i8> %a, i256 %i) nounwind {
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: andl $15, %edi
|
||||
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
|
||||
; SSE-NEXT: movb (%rdi,%rax), %al
|
||||
; SSE-NEXT: movb -24(%rsp,%rdi), %al
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extractelement_v16i8_var:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: andl $15, %edi
|
||||
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX-NEXT: movb (%rdi,%rax), %al
|
||||
; AVX-NEXT: movb -24(%rsp,%rdi), %al
|
||||
; AVX-NEXT: retq
|
||||
%b = extractelement <16 x i8> %a, i256 %i
|
||||
ret i8 %b
|
||||
@ -428,8 +426,7 @@ define i8 @extractelement_v32i8_var(<32 x i8> %a, i256 %i) nounwind {
|
||||
; SSE-NEXT: andl $31, %edi
|
||||
; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movaps %xmm0, (%rsp)
|
||||
; SSE-NEXT: movq %rsp, %rax
|
||||
; SSE-NEXT: movb (%rdi,%rax), %al
|
||||
; SSE-NEXT: movb (%rsp,%rdi), %al
|
||||
; SSE-NEXT: movq %rbp, %rsp
|
||||
; SSE-NEXT: popq %rbp
|
||||
; SSE-NEXT: retq
|
||||
@ -442,8 +439,7 @@ define i8 @extractelement_v32i8_var(<32 x i8> %a, i256 %i) nounwind {
|
||||
; AVX-NEXT: subq $64, %rsp
|
||||
; AVX-NEXT: andl $31, %edi
|
||||
; AVX-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX-NEXT: movq %rsp, %rax
|
||||
; AVX-NEXT: movb (%rdi,%rax), %al
|
||||
; AVX-NEXT: movb (%rsp,%rdi), %al
|
||||
; AVX-NEXT: movq %rbp, %rsp
|
||||
; AVX-NEXT: popq %rbp
|
||||
; AVX-NEXT: vzeroupper
|
||||
|
@ -142,14 +142,13 @@ define <8 x i32> @var_shuffle_v8i32(<8 x i32> %v, <8 x i32> %indices) nounwind {
|
||||
; AVX1-NEXT: andl $7, %r10d
|
||||
; AVX1-NEXT: andl $28, %edi
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: movq %rsp, %rax
|
||||
; AVX1-NEXT: vpinsrd $1, (%rdx,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrd $1, (%rsp,%rdx), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrd $2, (%rsp,%r10,4), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrd $3, (%rdi,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrd $3, (%rsp,%rdi), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpinsrd $1, (%rsi,%rax), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpinsrd $1, (%rsp,%rsi), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpinsrd $2, (%rsp,%r8,4), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpinsrd $3, (%rcx,%rax), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpinsrd $3, (%rsp,%rcx), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: movq %rbp, %rsp
|
||||
; AVX1-NEXT: popq %rbp
|
||||
@ -505,118 +504,117 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
|
||||
; AVX1-NEXT: vpextrb $0, %xmm2, %eax
|
||||
; AVX1-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movq %rsp, %rcx
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm2
|
||||
; AVX1-NEXT: vpextrb $1, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: vpinsrb $1, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpinsrb $1, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpextrb $2, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: vpinsrb $2, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpinsrb $2, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpextrb $3, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: vpinsrb $3, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpinsrb $3, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpextrb $4, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: vpinsrb $4, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpinsrb $4, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpextrb $5, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: vpinsrb $5, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpinsrb $5, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpextrb $6, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: vpinsrb $6, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpinsrb $6, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpextrb $7, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: vpinsrb $7, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpinsrb $7, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpextrb $8, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: vpinsrb $8, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpinsrb $8, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpextrb $9, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: vpinsrb $9, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpinsrb $9, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpextrb $10, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: vpinsrb $10, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpinsrb $10, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpextrb $11, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: vpinsrb $11, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpinsrb $11, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpextrb $12, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: vpinsrb $12, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpinsrb $12, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpextrb $13, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: vpinsrb $13, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpinsrb $13, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpextrb $14, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: vpinsrb $14, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpinsrb $14, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpextrb $15, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: movq %rbp, %rsp
|
||||
@ -633,118 +631,117 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
|
||||
; AVX2-NEXT: vpextrb $0, %xmm2, %eax
|
||||
; AVX2-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movq %rsp, %rcx
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm2
|
||||
; AVX2-NEXT: vpextrb $1, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: vpinsrb $1, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpinsrb $1, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpextrb $2, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: vpinsrb $2, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpinsrb $2, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpextrb $3, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: vpinsrb $3, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpinsrb $3, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpextrb $4, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: vpinsrb $4, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpinsrb $4, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpextrb $5, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: vpinsrb $5, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpinsrb $5, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpextrb $6, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: vpinsrb $6, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpinsrb $6, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpextrb $7, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: vpinsrb $7, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpinsrb $7, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpextrb $8, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: vpinsrb $8, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpinsrb $8, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpextrb $9, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: vpinsrb $9, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpinsrb $9, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpextrb $10, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: vpinsrb $10, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpinsrb $10, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpextrb $11, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: vpinsrb $11, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpinsrb $11, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpextrb $12, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: vpinsrb $12, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpinsrb $12, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpextrb $13, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: vpinsrb $13, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpinsrb $13, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpextrb $14, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: vpinsrb $14, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpinsrb $14, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpextrb $15, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: movq %rbp, %rsp
|
||||
@ -761,118 +758,117 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
|
||||
; AVX512F-NEXT: vpextrb $0, %xmm2, %eax
|
||||
; AVX512F-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movq %rsp, %rcx
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512F-NEXT: vpextrb $1, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: vpinsrb $1, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpinsrb $1, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpextrb $2, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: vpinsrb $2, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpinsrb $2, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpextrb $3, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: vpinsrb $3, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpinsrb $3, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpextrb $4, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: vpinsrb $4, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpinsrb $4, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpextrb $5, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: vpinsrb $5, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpinsrb $5, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpextrb $6, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: vpinsrb $6, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpinsrb $6, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpextrb $7, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: vpinsrb $7, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpinsrb $7, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpextrb $8, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: vpinsrb $8, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpinsrb $8, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpextrb $9, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: vpinsrb $9, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpinsrb $9, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpextrb $10, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: vpinsrb $10, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpinsrb $10, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpextrb $11, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: vpinsrb $11, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpinsrb $11, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpextrb $12, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: vpinsrb $12, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpinsrb $12, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpextrb $13, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: vpinsrb $13, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpinsrb $13, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpextrb $14, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: vpinsrb $14, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpinsrb $14, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpextrb $15, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: movq %rbp, %rsp
|
||||
@ -889,118 +885,117 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
|
||||
; AVX512VL-NEXT: vpextrb $0, %xmm2, %eax
|
||||
; AVX512VL-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movq %rsp, %rcx
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512VL-NEXT: vpextrb $1, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: vpinsrb $1, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpinsrb $1, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpextrb $2, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: vpinsrb $2, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpinsrb $2, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpextrb $3, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: vpinsrb $3, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpinsrb $3, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpextrb $4, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: vpinsrb $4, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpinsrb $4, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpextrb $5, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: vpinsrb $5, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpinsrb $5, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpextrb $6, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: vpinsrb $6, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpinsrb $6, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpextrb $7, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: vpinsrb $7, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpinsrb $7, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpextrb $8, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: vpinsrb $8, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpinsrb $8, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpextrb $9, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: vpinsrb $9, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpinsrb $9, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpextrb $10, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: vpinsrb $10, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpinsrb $10, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpextrb $11, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: vpinsrb $11, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpinsrb $11, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpextrb $12, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: vpinsrb $12, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpinsrb $12, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpextrb $13, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: vpinsrb $13, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpinsrb $13, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpextrb $14, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: vpinsrb $14, (%rax,%rcx), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpinsrb $14, (%rsp,%rax), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpextrb $15, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
|
||||
; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: movq %rbp, %rsp
|
||||
@ -1240,7 +1235,6 @@ define <8 x float> @var_shuffle_v8f32(<8 x float> %v, <8 x i32> %indices) nounwi
|
||||
; AVX1-NEXT: andl $7, %r10d
|
||||
; AVX1-NEXT: andl $28, %edi
|
||||
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: movq %rsp, %rax
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
|
||||
|
@ -511,265 +511,201 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
|
||||
; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
|
||||
; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
|
||||
; NOBW-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; NOBW-NEXT: vpextrb $1, %xmm4, %ecx
|
||||
; NOBW-NEXT: andl $63, %ecx
|
||||
; NOBW-NEXT: movzbl 3008(%rsp,%rax), %eax
|
||||
; NOBW-NEXT: vmovd %eax, %xmm0
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rax
|
||||
; NOBW-NEXT: vpinsrb $1, (%rcx,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $1, %xmm4, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: vpinsrb $1, 2944(%rsp,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $2, %xmm4, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $2, (%rax,%rcx), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpinsrb $2, 2880(%rsp,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $3, %xmm4, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $3, (%rax,%rcx), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpinsrb $3, 2816(%rsp,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $4, %xmm4, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $4, (%rax,%rcx), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpinsrb $4, 2752(%rsp,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $5, %xmm4, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $5, (%rax,%rcx), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpinsrb $5, 2688(%rsp,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $6, %xmm4, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $6, (%rax,%rcx), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpinsrb $6, 2624(%rsp,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $7, %xmm4, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $7, (%rax,%rcx), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpinsrb $7, 2560(%rsp,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $8, %xmm4, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $8, (%rax,%rcx), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpinsrb $8, 2496(%rsp,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $9, %xmm4, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $9, (%rax,%rcx), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpinsrb $9, 2432(%rsp,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $10, %xmm4, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $10, (%rax,%rcx), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpinsrb $10, 2368(%rsp,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $11, %xmm4, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $11, (%rax,%rcx), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpinsrb $11, 2304(%rsp,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $12, %xmm4, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $12, (%rax,%rcx), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpinsrb $12, 2240(%rsp,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $13, %xmm4, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $13, (%rax,%rcx), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpinsrb $13, 2176(%rsp,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $14, %xmm4, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $14, (%rax,%rcx), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpinsrb $14, 2112(%rsp,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $15, %xmm4, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $15, (%rax,%rcx), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpinsrb $15, 2048(%rsp,%rax), %xmm0, %xmm0
|
||||
; NOBW-NEXT: vpextrb $0, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; NOBW-NEXT: vpextrb $1, %xmm2, %ecx
|
||||
; NOBW-NEXT: andl $63, %ecx
|
||||
; NOBW-NEXT: movzbl 4032(%rsp,%rax), %eax
|
||||
; NOBW-NEXT: vmovd %eax, %xmm1
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rax
|
||||
; NOBW-NEXT: vpinsrb $1, (%rcx,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: vpinsrb $1, 3968(%rsp,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $2, (%rax,%rcx), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpinsrb $2, 3904(%rsp,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $3, (%rax,%rcx), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpinsrb $3, 3840(%rsp,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $4, (%rax,%rcx), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpinsrb $4, 3776(%rsp,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $5, (%rax,%rcx), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpinsrb $5, 3712(%rsp,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $6, (%rax,%rcx), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpinsrb $6, 3648(%rsp,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $7, (%rax,%rcx), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpinsrb $7, 3584(%rsp,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $8, (%rax,%rcx), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpinsrb $8, 3520(%rsp,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $9, (%rax,%rcx), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpinsrb $9, 3456(%rsp,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $10, (%rax,%rcx), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpinsrb $10, 3392(%rsp,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $11, (%rax,%rcx), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpinsrb $11, 3328(%rsp,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $12, (%rax,%rcx), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpinsrb $12, 3264(%rsp,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $13, (%rax,%rcx), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpinsrb $13, 3200(%rsp,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $14, (%rax,%rcx), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpinsrb $14, 3136(%rsp,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; NOBW-NEXT: vextracti128 $1, %ymm3, %xmm2
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $15, (%rax,%rcx), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpinsrb $15, 3072(%rsp,%rax), %xmm1, %xmm1
|
||||
; NOBW-NEXT: vpextrb $0, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; NOBW-NEXT: vpextrb $1, %xmm2, %ecx
|
||||
; NOBW-NEXT: andl $63, %ecx
|
||||
; NOBW-NEXT: movzbl 960(%rsp,%rax), %eax
|
||||
; NOBW-NEXT: vmovd %eax, %xmm4
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rax
|
||||
; NOBW-NEXT: vpinsrb $1, (%rcx,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: vpinsrb $1, 896(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $2, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $2, 832(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $3, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $3, 768(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $4, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $4, 704(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $5, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $5, 640(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $6, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $6, 576(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $7, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $7, 512(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $8, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $8, 448(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $9, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $9, 384(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $10, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $10, 320(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $11, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $11, 256(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $12, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $12, 192(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $13, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $13, 128(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $14, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $14, 64(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: movq %rsp, %rcx
|
||||
; NOBW-NEXT: vpinsrb $15, (%rax,%rcx), %xmm4, %xmm2
|
||||
; NOBW-NEXT: vpinsrb $15, (%rsp,%rax), %xmm4, %xmm2
|
||||
; NOBW-NEXT: vpextrb $0, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; NOBW-NEXT: vpextrb $1, %xmm3, %ecx
|
||||
; NOBW-NEXT: andl $63, %ecx
|
||||
; NOBW-NEXT: movzbl 1984(%rsp,%rax), %eax
|
||||
; NOBW-NEXT: vmovd %eax, %xmm4
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rax
|
||||
; NOBW-NEXT: vpinsrb $1, (%rcx,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $1, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: vpinsrb $1, 1920(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $2, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $2, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $2, 1856(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $3, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $3, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $3, 1792(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $4, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $4, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $4, 1728(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $5, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $5, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $5, 1664(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $6, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $6, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $6, 1600(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $7, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $7, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $7, 1536(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $8, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $8, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $8, 1472(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $9, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $9, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $9, 1408(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $10, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $10, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $10, 1344(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $11, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $11, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $11, 1280(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $12, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $12, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $12, 1216(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $13, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $13, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $13, 1152(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $14, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $14, (%rax,%rcx), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpinsrb $14, 1088(%rsp,%rax), %xmm4, %xmm4
|
||||
; NOBW-NEXT: vpextrb $15, %xmm3, %eax
|
||||
; NOBW-NEXT: andl $63, %eax
|
||||
; NOBW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; NOBW-NEXT: vpinsrb $15, (%rax,%rcx), %xmm4, %xmm3
|
||||
; NOBW-NEXT: vpinsrb $15, 1024(%rsp,%rax), %xmm4, %xmm3
|
||||
; NOBW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; NOBW-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm1
|
||||
; NOBW-NEXT: movq %rbp, %rsp
|
||||
|
@ -414,63 +414,62 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
|
||||
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: andl $15, %eax
|
||||
; SSE2-NEXT: leaq -{{[0-9]+}}(%rsp), %r10
|
||||
; SSE2-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm8
|
||||
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: andl $15, %eax
|
||||
; SSE2-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm15
|
||||
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: andl $15, %eax
|
||||
; SSE2-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm9
|
||||
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: andl $15, %eax
|
||||
; SSE2-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm3
|
||||
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: andl $15, %eax
|
||||
; SSE2-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm10
|
||||
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: andl $15, %eax
|
||||
; SSE2-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm7
|
||||
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: andl $15, %eax
|
||||
; SSE2-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm11
|
||||
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: andl $15, %eax
|
||||
; SSE2-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm6
|
||||
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: andl $15, %eax
|
||||
; SSE2-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm12
|
||||
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: andl $15, %eax
|
||||
; SSE2-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm5
|
||||
; SSE2-NEXT: andl $15, %r9d
|
||||
; SSE2-NEXT: movzbl (%r9,%r10), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm13
|
||||
; SSE2-NEXT: andl $15, %r8d
|
||||
; SSE2-NEXT: movzbl (%r8,%r10), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm4
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl (%rcx,%r10), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm14
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm12
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: movzbl (%rdx,%r10), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rdx), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm5
|
||||
; SSE2-NEXT: andl $15, %esi
|
||||
; SSE2-NEXT: movzbl (%rsi,%r10), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm2
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rsi), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm13
|
||||
; SSE2-NEXT: andl $15, %edi
|
||||
; SSE2-NEXT: movzbl (%rdi,%r10), %eax
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rdi), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: andl $15, %r9d
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%r9), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm14
|
||||
; SSE2-NEXT: andl $15, %r8d
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%r8), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: andl $15, %eax
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm4
|
||||
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE2-NEXT: andl $15, %eax
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm2
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
|
||||
@ -479,12 +478,12 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm13[0],xmm0[1],xmm13[1],xmm0[2],xmm13[2],xmm0[3],xmm13[3],xmm0[4],xmm13[4],xmm0[5],xmm13[5],xmm0[6],xmm13[6],xmm0[7],xmm13[7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -499,63 +498,62 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
|
||||
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSSE3-NEXT: andl $15, %eax
|
||||
; SSSE3-NEXT: leaq -{{[0-9]+}}(%rsp), %r10
|
||||
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm8
|
||||
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSSE3-NEXT: andl $15, %eax
|
||||
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm15
|
||||
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSSE3-NEXT: andl $15, %eax
|
||||
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm9
|
||||
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSSE3-NEXT: andl $15, %eax
|
||||
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm3
|
||||
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSSE3-NEXT: andl $15, %eax
|
||||
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm10
|
||||
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSSE3-NEXT: andl $15, %eax
|
||||
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm7
|
||||
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSSE3-NEXT: andl $15, %eax
|
||||
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm11
|
||||
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSSE3-NEXT: andl $15, %eax
|
||||
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm6
|
||||
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSSE3-NEXT: andl $15, %eax
|
||||
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm12
|
||||
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSSE3-NEXT: andl $15, %eax
|
||||
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm5
|
||||
; SSSE3-NEXT: andl $15, %r9d
|
||||
; SSSE3-NEXT: movzbl (%r9,%r10), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm13
|
||||
; SSSE3-NEXT: andl $15, %r8d
|
||||
; SSSE3-NEXT: movzbl (%r8,%r10), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm4
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl (%rcx,%r10), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm14
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm12
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%r10), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm1
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rdx), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm5
|
||||
; SSSE3-NEXT: andl $15, %esi
|
||||
; SSSE3-NEXT: movzbl (%rsi,%r10), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm2
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rsi), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm13
|
||||
; SSSE3-NEXT: andl $15, %edi
|
||||
; SSSE3-NEXT: movzbl (%rdi,%r10), %eax
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rdi), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: andl $15, %r9d
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%r9), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm14
|
||||
; SSSE3-NEXT: andl $15, %r8d
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%r8), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm1
|
||||
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSSE3-NEXT: andl $15, %eax
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm4
|
||||
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSSE3-NEXT: andl $15, %eax
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm2
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
|
||||
@ -564,12 +562,12 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm13[0],xmm0[1],xmm13[1],xmm0[2],xmm13[2],xmm0[3],xmm13[3],xmm0[4],xmm13[4],xmm0[5],xmm13[5],xmm0[6],xmm13[6],xmm0[7],xmm13[7]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -583,49 +581,48 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
|
||||
; SSE41-NEXT: # kill: def %edi killed %edi def %rdi
|
||||
; SSE41-NEXT: andl $15, %edi
|
||||
; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE41-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
|
||||
; SSE41-NEXT: movzbl (%rdi,%rax), %edi
|
||||
; SSE41-NEXT: movd %edi, %xmm0
|
||||
; SSE41-NEXT: movzbl -24(%rsp,%rdi), %eax
|
||||
; SSE41-NEXT: movd %eax, %xmm0
|
||||
; SSE41-NEXT: andl $15, %esi
|
||||
; SSE41-NEXT: pinsrb $1, (%rsi,%rax), %xmm0
|
||||
; SSE41-NEXT: pinsrb $1, -24(%rsp,%rsi), %xmm0
|
||||
; SSE41-NEXT: andl $15, %edx
|
||||
; SSE41-NEXT: pinsrb $2, (%rdx,%rax), %xmm0
|
||||
; SSE41-NEXT: pinsrb $2, -24(%rsp,%rdx), %xmm0
|
||||
; SSE41-NEXT: andl $15, %ecx
|
||||
; SSE41-NEXT: pinsrb $3, (%rcx,%rax), %xmm0
|
||||
; SSE41-NEXT: pinsrb $3, -24(%rsp,%rcx), %xmm0
|
||||
; SSE41-NEXT: andl $15, %r8d
|
||||
; SSE41-NEXT: pinsrb $4, (%r8,%rax), %xmm0
|
||||
; SSE41-NEXT: pinsrb $4, -24(%rsp,%r8), %xmm0
|
||||
; SSE41-NEXT: andl $15, %r9d
|
||||
; SSE41-NEXT: pinsrb $5, (%r9,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; SSE41-NEXT: andl $15, %ecx
|
||||
; SSE41-NEXT: pinsrb $6, (%rcx,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; SSE41-NEXT: andl $15, %ecx
|
||||
; SSE41-NEXT: pinsrb $7, (%rcx,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; SSE41-NEXT: andl $15, %ecx
|
||||
; SSE41-NEXT: pinsrb $8, (%rcx,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; SSE41-NEXT: andl $15, %ecx
|
||||
; SSE41-NEXT: pinsrb $9, (%rcx,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; SSE41-NEXT: andl $15, %ecx
|
||||
; SSE41-NEXT: pinsrb $10, (%rcx,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; SSE41-NEXT: andl $15, %ecx
|
||||
; SSE41-NEXT: pinsrb $11, (%rcx,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; SSE41-NEXT: andl $15, %ecx
|
||||
; SSE41-NEXT: pinsrb $12, (%rcx,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; SSE41-NEXT: andl $15, %ecx
|
||||
; SSE41-NEXT: pinsrb $13, (%rcx,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; SSE41-NEXT: andl $15, %ecx
|
||||
; SSE41-NEXT: pinsrb $14, (%rcx,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; SSE41-NEXT: andl $15, %ecx
|
||||
; SSE41-NEXT: pinsrb $15, (%rcx,%rax), %xmm0
|
||||
; SSE41-NEXT: pinsrb $5, -24(%rsp,%r9), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $6, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $7, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $8, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $9, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $10, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $11, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $12, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $13, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $14, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $15, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
|
||||
@ -638,49 +635,48 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
|
||||
; AVX-NEXT: # kill: def %edi killed %edi def %rdi
|
||||
; AVX-NEXT: andl $15, %edi
|
||||
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
|
||||
; AVX-NEXT: movzbl (%rdi,%rax), %edi
|
||||
; AVX-NEXT: vmovd %edi, %xmm0
|
||||
; AVX-NEXT: movzbl -24(%rsp,%rdi), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: andl $15, %esi
|
||||
; AVX-NEXT: vpinsrb $1, (%rsi,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $1, -24(%rsp,%rsi), %xmm0, %xmm0
|
||||
; AVX-NEXT: andl $15, %edx
|
||||
; AVX-NEXT: vpinsrb $2, (%rdx,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $2, -24(%rsp,%rdx), %xmm0, %xmm0
|
||||
; AVX-NEXT: andl $15, %ecx
|
||||
; AVX-NEXT: vpinsrb $3, (%rcx,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $3, -24(%rsp,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: andl $15, %r8d
|
||||
; AVX-NEXT: vpinsrb $4, (%r8,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $4, -24(%rsp,%r8), %xmm0, %xmm0
|
||||
; AVX-NEXT: andl $15, %r9d
|
||||
; AVX-NEXT: vpinsrb $5, (%r9,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX-NEXT: andl $15, %ecx
|
||||
; AVX-NEXT: vpinsrb $6, (%rcx,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX-NEXT: andl $15, %ecx
|
||||
; AVX-NEXT: vpinsrb $7, (%rcx,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX-NEXT: andl $15, %ecx
|
||||
; AVX-NEXT: vpinsrb $8, (%rcx,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX-NEXT: andl $15, %ecx
|
||||
; AVX-NEXT: vpinsrb $9, (%rcx,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX-NEXT: andl $15, %ecx
|
||||
; AVX-NEXT: vpinsrb $10, (%rcx,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX-NEXT: andl $15, %ecx
|
||||
; AVX-NEXT: vpinsrb $11, (%rcx,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX-NEXT: andl $15, %ecx
|
||||
; AVX-NEXT: vpinsrb $12, (%rcx,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX-NEXT: andl $15, %ecx
|
||||
; AVX-NEXT: vpinsrb $13, (%rcx,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX-NEXT: andl $15, %ecx
|
||||
; AVX-NEXT: vpinsrb $14, (%rcx,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX-NEXT: andl $15, %ecx
|
||||
; AVX-NEXT: vpinsrb $15, (%rcx,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $5, -24(%rsp,%r9), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $6, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $7, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $8, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $9, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $10, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $11, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $12, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $13, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $14, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $15, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%x0 = extractelement <16 x i8> %x, i8 %i0
|
||||
%x1 = extractelement <16 x i8> %x, i8 %i1
|
||||
@ -819,69 +815,68 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movzbl (%rdi), %eax
|
||||
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE2-NEXT: movzbl 15(%rdi), %edx
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: leaq -{{[0-9]+}}(%rsp), %rcx
|
||||
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm8
|
||||
; SSE2-NEXT: movzbl 14(%rdi), %edx
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm15
|
||||
; SSE2-NEXT: movzbl 13(%rdi), %edx
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm9
|
||||
; SSE2-NEXT: movzbl 12(%rdi), %edx
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm3
|
||||
; SSE2-NEXT: movzbl 11(%rdi), %edx
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm10
|
||||
; SSE2-NEXT: movzbl 10(%rdi), %edx
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm7
|
||||
; SSE2-NEXT: movzbl 9(%rdi), %edx
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm11
|
||||
; SSE2-NEXT: movzbl 8(%rdi), %edx
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm6
|
||||
; SSE2-NEXT: movzbl 7(%rdi), %edx
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm12
|
||||
; SSE2-NEXT: movzbl 6(%rdi), %edx
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm5
|
||||
; SSE2-NEXT: movzbl 5(%rdi), %edx
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm13
|
||||
; SSE2-NEXT: movzbl 4(%rdi), %edx
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm4
|
||||
; SSE2-NEXT: movzbl 3(%rdi), %edx
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm14
|
||||
; SSE2-NEXT: movzbl 2(%rdi), %edx
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm1
|
||||
; SSE2-NEXT: movzbl 1(%rdi), %edx
|
||||
; SSE2-NEXT: andl $15, %edx
|
||||
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm2
|
||||
; SSE2-NEXT: movzbl 15(%rdi), %ecx
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm8
|
||||
; SSE2-NEXT: movzbl 14(%rdi), %ecx
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm15
|
||||
; SSE2-NEXT: movzbl 13(%rdi), %ecx
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm9
|
||||
; SSE2-NEXT: movzbl 12(%rdi), %ecx
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm3
|
||||
; SSE2-NEXT: movzbl 11(%rdi), %ecx
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm10
|
||||
; SSE2-NEXT: movzbl 10(%rdi), %ecx
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm7
|
||||
; SSE2-NEXT: movzbl 9(%rdi), %ecx
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm11
|
||||
; SSE2-NEXT: movzbl 8(%rdi), %ecx
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm6
|
||||
; SSE2-NEXT: movzbl 7(%rdi), %ecx
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm12
|
||||
; SSE2-NEXT: movzbl 6(%rdi), %ecx
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm5
|
||||
; SSE2-NEXT: movzbl 5(%rdi), %ecx
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm13
|
||||
; SSE2-NEXT: movzbl 4(%rdi), %ecx
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm4
|
||||
; SSE2-NEXT: movzbl 3(%rdi), %ecx
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm14
|
||||
; SSE2-NEXT: movzbl 2(%rdi), %ecx
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm1
|
||||
; SSE2-NEXT: movzbl 1(%rdi), %ecx
|
||||
; SSE2-NEXT: andl $15, %ecx
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm2
|
||||
; SSE2-NEXT: andl $15, %eax
|
||||
; SSE2-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; SSE2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
|
||||
@ -904,69 +899,68 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movzbl (%rdi), %eax
|
||||
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSSE3-NEXT: movzbl 15(%rdi), %edx
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: leaq -{{[0-9]+}}(%rsp), %rcx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm8
|
||||
; SSSE3-NEXT: movzbl 14(%rdi), %edx
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm15
|
||||
; SSSE3-NEXT: movzbl 13(%rdi), %edx
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm9
|
||||
; SSSE3-NEXT: movzbl 12(%rdi), %edx
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm3
|
||||
; SSSE3-NEXT: movzbl 11(%rdi), %edx
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm10
|
||||
; SSSE3-NEXT: movzbl 10(%rdi), %edx
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm7
|
||||
; SSSE3-NEXT: movzbl 9(%rdi), %edx
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm11
|
||||
; SSSE3-NEXT: movzbl 8(%rdi), %edx
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm6
|
||||
; SSSE3-NEXT: movzbl 7(%rdi), %edx
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm12
|
||||
; SSSE3-NEXT: movzbl 6(%rdi), %edx
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm5
|
||||
; SSSE3-NEXT: movzbl 5(%rdi), %edx
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm13
|
||||
; SSSE3-NEXT: movzbl 4(%rdi), %edx
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm4
|
||||
; SSSE3-NEXT: movzbl 3(%rdi), %edx
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm14
|
||||
; SSSE3-NEXT: movzbl 2(%rdi), %edx
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm1
|
||||
; SSSE3-NEXT: movzbl 1(%rdi), %edx
|
||||
; SSSE3-NEXT: andl $15, %edx
|
||||
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm2
|
||||
; SSSE3-NEXT: movzbl 15(%rdi), %ecx
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm8
|
||||
; SSSE3-NEXT: movzbl 14(%rdi), %ecx
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm15
|
||||
; SSSE3-NEXT: movzbl 13(%rdi), %ecx
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm9
|
||||
; SSSE3-NEXT: movzbl 12(%rdi), %ecx
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm3
|
||||
; SSSE3-NEXT: movzbl 11(%rdi), %ecx
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm10
|
||||
; SSSE3-NEXT: movzbl 10(%rdi), %ecx
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm7
|
||||
; SSSE3-NEXT: movzbl 9(%rdi), %ecx
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm11
|
||||
; SSSE3-NEXT: movzbl 8(%rdi), %ecx
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm6
|
||||
; SSSE3-NEXT: movzbl 7(%rdi), %ecx
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm12
|
||||
; SSSE3-NEXT: movzbl 6(%rdi), %ecx
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm5
|
||||
; SSSE3-NEXT: movzbl 5(%rdi), %ecx
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm13
|
||||
; SSSE3-NEXT: movzbl 4(%rdi), %ecx
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm4
|
||||
; SSSE3-NEXT: movzbl 3(%rdi), %ecx
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm14
|
||||
; SSSE3-NEXT: movzbl 2(%rdi), %ecx
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm1
|
||||
; SSSE3-NEXT: movzbl 1(%rdi), %ecx
|
||||
; SSSE3-NEXT: andl $15, %ecx
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rcx), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm2
|
||||
; SSSE3-NEXT: andl $15, %eax
|
||||
; SSSE3-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; SSSE3-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
|
||||
@ -990,54 +984,53 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
|
||||
; SSE41-NEXT: movzbl (%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE41-NEXT: leaq -{{[0-9]+}}(%rsp), %rcx
|
||||
; SSE41-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; SSE41-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; SSE41-NEXT: movd %eax, %xmm0
|
||||
; SSE41-NEXT: movzbl 1(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $1, (%rax,%rcx), %xmm0
|
||||
; SSE41-NEXT: pinsrb $1, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl 2(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $2, (%rax,%rcx), %xmm0
|
||||
; SSE41-NEXT: pinsrb $2, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl 3(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $3, (%rax,%rcx), %xmm0
|
||||
; SSE41-NEXT: pinsrb $3, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl 4(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $4, (%rax,%rcx), %xmm0
|
||||
; SSE41-NEXT: pinsrb $4, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl 5(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $5, (%rax,%rcx), %xmm0
|
||||
; SSE41-NEXT: pinsrb $5, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl 6(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $6, (%rax,%rcx), %xmm0
|
||||
; SSE41-NEXT: pinsrb $6, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl 7(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $7, (%rax,%rcx), %xmm0
|
||||
; SSE41-NEXT: pinsrb $7, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl 8(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $8, (%rax,%rcx), %xmm0
|
||||
; SSE41-NEXT: pinsrb $8, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl 9(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $9, (%rax,%rcx), %xmm0
|
||||
; SSE41-NEXT: pinsrb $9, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl 10(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $10, (%rax,%rcx), %xmm0
|
||||
; SSE41-NEXT: pinsrb $10, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl 11(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $11, (%rax,%rcx), %xmm0
|
||||
; SSE41-NEXT: pinsrb $11, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl 12(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $12, (%rax,%rcx), %xmm0
|
||||
; SSE41-NEXT: pinsrb $12, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl 13(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $13, (%rax,%rcx), %xmm0
|
||||
; SSE41-NEXT: pinsrb $13, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl 14(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $14, (%rax,%rcx), %xmm0
|
||||
; SSE41-NEXT: pinsrb $14, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: movzbl 15(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: pinsrb $15, (%rax,%rcx), %xmm0
|
||||
; SSE41-NEXT: pinsrb $15, -24(%rsp,%rax), %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
|
||||
@ -1045,54 +1038,53 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
|
||||
; AVX-NEXT: movzbl (%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rcx
|
||||
; AVX-NEXT: movzbl (%rax,%rcx), %eax
|
||||
; AVX-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: movzbl 1(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $1, (%rax,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $1, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl 2(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $2, (%rax,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $2, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl 3(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $3, (%rax,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $3, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl 4(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $4, (%rax,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $4, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl 5(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $5, (%rax,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $5, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl 6(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $6, (%rax,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $6, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl 7(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $7, (%rax,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $7, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl 8(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $8, (%rax,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $8, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl 9(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $9, (%rax,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $9, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl 10(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $10, (%rax,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $10, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl 11(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $11, (%rax,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $11, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl 12(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $12, (%rax,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $12, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl 13(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $13, (%rax,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $13, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl 14(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $14, (%rax,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $14, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: movzbl 15(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: vpinsrb $15, (%rax,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $15, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%p0 = getelementptr inbounds i8, i8* %i, i64 0
|
||||
%p1 = getelementptr inbounds i8, i8* %i, i64 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user