mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-31 20:44:18 +00:00
[X86] Legalize (i64 (bitcast (v64i1 X))) on 32-bit targets by extracting to v32i1 and bitcasting to i32.
This saves a trip through memory and seems to open up other combining opportunities. llvm-svn: 324056
This commit is contained in:
parent
b22c1d29bc
commit
5570e03b21
@ -24953,6 +24953,23 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
EVT DstVT = N->getValueType(0);
|
||||
EVT SrcVT = N->getOperand(0).getValueType();
|
||||
|
||||
// If this is a bitcast from a v64i1 k-register to a i64 on a 32-bit target
|
||||
// we can split using the k-register rather than memory.
|
||||
if (SrcVT == MVT::v64i1 && DstVT == MVT::i64 && Subtarget.hasBWI()) {
|
||||
assert(!Subtarget.is64Bit() && "Expected 32-bit mode");
|
||||
SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v32i1,
|
||||
N->getOperand(0),
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
Lo = DAG.getBitcast(MVT::i32, Lo);
|
||||
SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v32i1,
|
||||
N->getOperand(0),
|
||||
DAG.getIntPtrConstant(32, dl));
|
||||
Hi = DAG.getBitcast(MVT::i32, Hi);
|
||||
SDValue Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
|
||||
Results.push_back(Res);
|
||||
return;
|
||||
}
|
||||
|
||||
if (SrcVT != MVT::f64 ||
|
||||
(DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8))
|
||||
return;
|
||||
|
@ -7,46 +7,30 @@
|
||||
define x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2, <64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5, <64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8, <64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11, <64 x i1> %x12) {
|
||||
; X32-LABEL: test_argv64i1:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: andl $-8, %esp
|
||||
; X32-NEXT: subl $16, %esp
|
||||
; X32-NEXT: kmovd %edx, %k0
|
||||
; X32-NEXT: kmovd %edi, %k1
|
||||
; X32-NEXT: kunpckdq %k0, %k1, %k0
|
||||
; X32-NEXT: kmovd %eax, %k1
|
||||
; X32-NEXT: kmovd %ecx, %k2
|
||||
; X32-NEXT: kunpckdq %k1, %k2, %k1
|
||||
; X32-NEXT: kmovq %k1, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: kmovq %k0, (%esp)
|
||||
; X32-NEXT: addl (%esp), %eax
|
||||
; X32-NEXT: addl %edx, %eax
|
||||
; X32-NEXT: adcl %edi, %ecx
|
||||
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: addl 8(%ebp), %eax
|
||||
; X32-NEXT: adcl 12(%ebp), %ecx
|
||||
; X32-NEXT: addl 16(%ebp), %eax
|
||||
; X32-NEXT: adcl 20(%ebp), %ecx
|
||||
; X32-NEXT: addl 24(%ebp), %eax
|
||||
; X32-NEXT: adcl 28(%ebp), %ecx
|
||||
; X32-NEXT: addl 32(%ebp), %eax
|
||||
; X32-NEXT: adcl 36(%ebp), %ecx
|
||||
; X32-NEXT: addl 40(%ebp), %eax
|
||||
; X32-NEXT: adcl 44(%ebp), %ecx
|
||||
; X32-NEXT: addl 48(%ebp), %eax
|
||||
; X32-NEXT: adcl 52(%ebp), %ecx
|
||||
; X32-NEXT: addl 56(%ebp), %eax
|
||||
; X32-NEXT: adcl 60(%ebp), %ecx
|
||||
; X32-NEXT: addl 64(%ebp), %eax
|
||||
; X32-NEXT: adcl 68(%ebp), %ecx
|
||||
; X32-NEXT: addl 72(%ebp), %eax
|
||||
; X32-NEXT: adcl 76(%ebp), %ecx
|
||||
; X32-NEXT: addl 80(%ebp), %eax
|
||||
; X32-NEXT: adcl 84(%ebp), %ecx
|
||||
; X32-NEXT: addl 88(%ebp), %eax
|
||||
; X32-NEXT: adcl 92(%ebp), %ecx
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; WIN64-LABEL: test_argv64i1:
|
||||
|
@ -17,11 +17,12 @@ define i64 @test_mm512_kunpackd(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C,
|
||||
; X32-NEXT: vmovdqa64 136(%ebp), %zmm3
|
||||
; X32-NEXT: vpcmpneqb %zmm0, %zmm1, %k0
|
||||
; X32-NEXT: vpcmpneqb 8(%ebp), %zmm2, %k1
|
||||
; X32-NEXT: kunpckdq %k0, %k1, %k1
|
||||
; X32-NEXT: vpcmpneqb 72(%ebp), %zmm3, %k0 {%k1}
|
||||
; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X32-NEXT: vpcmpneqb 72(%ebp), %zmm3, %k2
|
||||
; X32-NEXT: kandd %k0, %k2, %k0
|
||||
; X32-NEXT: kmovd %k0, %eax
|
||||
; X32-NEXT: kshiftrq $32, %k2, %k0
|
||||
; X32-NEXT: kandd %k1, %k0, %k0
|
||||
; X32-NEXT: kmovd %k0, %edx
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: vzeroupper
|
||||
@ -1647,19 +1648,10 @@ define <8 x i64> @test_mm512_maskz_unpacklo_epi16(i32 %a0, <8 x i64> %a1, <8 x i
|
||||
define i64 @test_mm512_test_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) {
|
||||
; X32-LABEL: test_mm512_test_epi8_mask:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: .cfi_offset %ebp, -8
|
||||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X32-NEXT: andl $-8, %esp
|
||||
; X32-NEXT: subl $8, %esp
|
||||
; X32-NEXT: vptestmb %zmm0, %zmm1, %k0
|
||||
; X32-NEXT: kmovq %k0, (%esp)
|
||||
; X32-NEXT: movl (%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: kshiftrq $32, %k0, %k1
|
||||
; X32-NEXT: kmovd %k0, %eax
|
||||
; X32-NEXT: kmovd %k1, %edx
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
@ -1680,18 +1672,13 @@ entry:
|
||||
define i64 @test_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) {
|
||||
; X32-LABEL: test_mm512_mask_test_epi8_mask:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: .cfi_offset %ebp, -8
|
||||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X32-NEXT: pushl %ebx
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: andl $-8, %esp
|
||||
; X32-NEXT: subl $8, %esp
|
||||
; X32-NEXT: .cfi_offset %esi, -16
|
||||
; X32-NEXT: .cfi_offset %ebx, -12
|
||||
; X32-NEXT: movl 8(%ebp), %eax
|
||||
; X32-NEXT: .cfi_def_cfa_offset 12
|
||||
; X32-NEXT: .cfi_offset %esi, -12
|
||||
; X32-NEXT: .cfi_offset %ebx, -8
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: kmovd %eax, %k0
|
||||
; X32-NEXT: kshiftrq $1, %k0, %k1
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
@ -1798,7 +1785,7 @@ define i64 @test_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %
|
||||
; X32-NEXT: movl %ecx, %ebx
|
||||
; X32-NEXT: shrb $2, %bl
|
||||
; X32-NEXT: kmovd %ebx, %k7
|
||||
; X32-NEXT: movl 12(%ebp), %ebx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $53, %k1, %k1
|
||||
; X32-NEXT: kxorq %k1, %k0, %k0
|
||||
@ -2211,13 +2198,11 @@ define i64 @test_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: korq %k1, %k0, %k1
|
||||
; X32-NEXT: vptestmb %zmm0, %zmm1, %k0 {%k1}
|
||||
; X32-NEXT: kmovq %k0, (%esp)
|
||||
; X32-NEXT: movl (%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X32-NEXT: leal -8(%ebp), %esp
|
||||
; X32-NEXT: kshiftrq $32, %k0, %k1
|
||||
; X32-NEXT: kmovd %k0, %eax
|
||||
; X32-NEXT: kmovd %k1, %edx
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: popl %ebx
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
@ -2289,19 +2274,10 @@ entry:
|
||||
define i64 @test_mm512_testn_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) {
|
||||
; X32-LABEL: test_mm512_testn_epi8_mask:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: .cfi_offset %ebp, -8
|
||||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X32-NEXT: andl $-8, %esp
|
||||
; X32-NEXT: subl $8, %esp
|
||||
; X32-NEXT: vptestnmb %zmm0, %zmm1, %k0
|
||||
; X32-NEXT: kmovq %k0, (%esp)
|
||||
; X32-NEXT: movl (%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: kshiftrq $32, %k0, %k1
|
||||
; X32-NEXT: kmovd %k0, %eax
|
||||
; X32-NEXT: kmovd %k1, %edx
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
@ -2322,18 +2298,13 @@ entry:
|
||||
define i64 @test_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) {
|
||||
; X32-LABEL: test_mm512_mask_testn_epi8_mask:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: .cfi_offset %ebp, -8
|
||||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X32-NEXT: pushl %ebx
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: andl $-8, %esp
|
||||
; X32-NEXT: subl $8, %esp
|
||||
; X32-NEXT: .cfi_offset %esi, -16
|
||||
; X32-NEXT: .cfi_offset %ebx, -12
|
||||
; X32-NEXT: movl 8(%ebp), %eax
|
||||
; X32-NEXT: .cfi_def_cfa_offset 12
|
||||
; X32-NEXT: .cfi_offset %esi, -12
|
||||
; X32-NEXT: .cfi_offset %ebx, -8
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: kmovd %eax, %k0
|
||||
; X32-NEXT: kshiftrq $1, %k0, %k1
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
@ -2440,7 +2411,7 @@ define i64 @test_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64>
|
||||
; X32-NEXT: movl %ecx, %ebx
|
||||
; X32-NEXT: shrb $2, %bl
|
||||
; X32-NEXT: kmovd %ebx, %k7
|
||||
; X32-NEXT: movl 12(%ebp), %ebx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $53, %k1, %k1
|
||||
; X32-NEXT: kxorq %k1, %k0, %k0
|
||||
@ -2853,13 +2824,11 @@ define i64 @test_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64>
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: korq %k1, %k0, %k1
|
||||
; X32-NEXT: vptestnmb %zmm0, %zmm1, %k0 {%k1}
|
||||
; X32-NEXT: kmovq %k0, (%esp)
|
||||
; X32-NEXT: movl (%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X32-NEXT: leal -8(%ebp), %esp
|
||||
; X32-NEXT: kshiftrq $32, %k0, %k1
|
||||
; X32-NEXT: kmovd %k0, %eax
|
||||
; X32-NEXT: kmovd %k1, %edx
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: popl %ebx
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user