[X86] Legalize (i64 (bitcast (v64i1 X))) on 32-bit targets by extracting to v32i1 and bitcasting to i32.

This saves a trip through memory and seems to open up other combining opportunities.

llvm-svn: 324056
This commit is contained in:
Craig Topper 2018-02-02 05:59:31 +00:00
parent b22c1d29bc
commit 5570e03b21
4 changed files with 488 additions and 490 deletions

View File

@ -24953,6 +24953,23 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
EVT DstVT = N->getValueType(0);
EVT SrcVT = N->getOperand(0).getValueType();
// If this is a bitcast from a v64i1 k-register to a i64 on a 32-bit target
// we can split using the k-register rather than memory.
if (SrcVT == MVT::v64i1 && DstVT == MVT::i64 && Subtarget.hasBWI()) {
assert(!Subtarget.is64Bit() && "Expected 32-bit mode");
SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v32i1,
N->getOperand(0),
DAG.getIntPtrConstant(0, dl));
Lo = DAG.getBitcast(MVT::i32, Lo);
SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v32i1,
N->getOperand(0),
DAG.getIntPtrConstant(32, dl));
Hi = DAG.getBitcast(MVT::i32, Hi);
SDValue Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
Results.push_back(Res);
return;
}
if (SrcVT != MVT::f64 ||
(DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8))
return;

View File

@ -7,46 +7,30 @@
define x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2, <64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5, <64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8, <64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11, <64 x i1> %x12) {
; X32-LABEL: test_argv64i1:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $16, %esp
; X32-NEXT: kmovd %edx, %k0
; X32-NEXT: kmovd %edi, %k1
; X32-NEXT: kunpckdq %k0, %k1, %k0
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: kmovd %ecx, %k2
; X32-NEXT: kunpckdq %k1, %k2, %k1
; X32-NEXT: kmovq %k1, {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: kmovq %k0, (%esp)
; X32-NEXT: addl (%esp), %eax
; X32-NEXT: addl %edx, %eax
; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl 8(%ebp), %eax
; X32-NEXT: adcl 12(%ebp), %ecx
; X32-NEXT: addl 16(%ebp), %eax
; X32-NEXT: adcl 20(%ebp), %ecx
; X32-NEXT: addl 24(%ebp), %eax
; X32-NEXT: adcl 28(%ebp), %ecx
; X32-NEXT: addl 32(%ebp), %eax
; X32-NEXT: adcl 36(%ebp), %ecx
; X32-NEXT: addl 40(%ebp), %eax
; X32-NEXT: adcl 44(%ebp), %ecx
; X32-NEXT: addl 48(%ebp), %eax
; X32-NEXT: adcl 52(%ebp), %ecx
; X32-NEXT: addl 56(%ebp), %eax
; X32-NEXT: adcl 60(%ebp), %ecx
; X32-NEXT: addl 64(%ebp), %eax
; X32-NEXT: adcl 68(%ebp), %ecx
; X32-NEXT: addl 72(%ebp), %eax
; X32-NEXT: adcl 76(%ebp), %ecx
; X32-NEXT: addl 80(%ebp), %eax
; X32-NEXT: adcl 84(%ebp), %ecx
; X32-NEXT: addl 88(%ebp), %eax
; X32-NEXT: adcl 92(%ebp), %ecx
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: retl
;
; WIN64-LABEL: test_argv64i1:

View File

@ -17,11 +17,12 @@ define i64 @test_mm512_kunpackd(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C,
; X32-NEXT: vmovdqa64 136(%ebp), %zmm3
; X32-NEXT: vpcmpneqb %zmm0, %zmm1, %k0
; X32-NEXT: vpcmpneqb 8(%ebp), %zmm2, %k1
; X32-NEXT: kunpckdq %k0, %k1, %k1
; X32-NEXT: vpcmpneqb 72(%ebp), %zmm3, %k0 {%k1}
; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: vpcmpneqb 72(%ebp), %zmm3, %k2
; X32-NEXT: kandd %k0, %k2, %k0
; X32-NEXT: kmovd %k0, %eax
; X32-NEXT: kshiftrq $32, %k2, %k0
; X32-NEXT: kandd %k1, %k0, %k0
; X32-NEXT: kmovd %k0, %edx
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: vzeroupper
@ -1647,19 +1648,10 @@ define <8 x i64> @test_mm512_maskz_unpacklo_epi16(i32 %a0, <8 x i64> %a1, <8 x i
define i64 @test_mm512_test_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) {
; X32-LABEL: test_mm512_test_epi8_mask:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: vptestmb %zmm0, %zmm1, %k0
; X32-NEXT: kmovq %k0, (%esp)
; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: kshiftrq $32, %k0, %k1
; X32-NEXT: kmovd %k0, %eax
; X32-NEXT: kmovd %k1, %edx
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
@ -1680,18 +1672,13 @@ entry:
define i64 @test_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X32-LABEL: test_mm512_mask_test_epi8_mask:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: .cfi_def_cfa_offset 12
; X32-NEXT: .cfi_offset %esi, -12
; X32-NEXT: .cfi_offset %ebx, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovd %eax, %k0
; X32-NEXT: kshiftrq $1, %k0, %k1
; X32-NEXT: movl %eax, %ecx
@ -1798,7 +1785,7 @@ define i64 @test_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %
; X32-NEXT: movl %ecx, %ebx
; X32-NEXT: shrb $2, %bl
; X32-NEXT: kmovd %ebx, %k7
; X32-NEXT: movl 12(%ebp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $53, %k1, %k1
; X32-NEXT: kxorq %k1, %k0, %k0
@ -2211,13 +2198,11 @@ define i64 @test_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: korq %k1, %k0, %k1
; X32-NEXT: vptestmb %zmm0, %zmm1, %k0 {%k1}
; X32-NEXT: kmovq %k0, (%esp)
; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: kshiftrq $32, %k0, %k1
; X32-NEXT: kmovd %k0, %eax
; X32-NEXT: kmovd %k1, %edx
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
@ -2289,19 +2274,10 @@ entry:
define i64 @test_mm512_testn_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) {
; X32-LABEL: test_mm512_testn_epi8_mask:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: vptestnmb %zmm0, %zmm1, %k0
; X32-NEXT: kmovq %k0, (%esp)
; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: kshiftrq $32, %k0, %k1
; X32-NEXT: kmovd %k0, %eax
; X32-NEXT: kmovd %k1, %edx
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
@ -2322,18 +2298,13 @@ entry:
define i64 @test_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X32-LABEL: test_mm512_mask_testn_epi8_mask:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: .cfi_def_cfa_offset 12
; X32-NEXT: .cfi_offset %esi, -12
; X32-NEXT: .cfi_offset %ebx, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovd %eax, %k0
; X32-NEXT: kshiftrq $1, %k0, %k1
; X32-NEXT: movl %eax, %ecx
@ -2440,7 +2411,7 @@ define i64 @test_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64>
; X32-NEXT: movl %ecx, %ebx
; X32-NEXT: shrb $2, %bl
; X32-NEXT: kmovd %ebx, %k7
; X32-NEXT: movl 12(%ebp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $53, %k1, %k1
; X32-NEXT: kxorq %k1, %k0, %k0
@ -2853,13 +2824,11 @@ define i64 @test_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64>
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: korq %k1, %k0, %k1
; X32-NEXT: vptestnmb %zmm0, %zmm1, %k0 {%k1}
; X32-NEXT: kmovq %k0, (%esp)
; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: kshiftrq $32, %k0, %k1
; X32-NEXT: kmovd %k0, %eax
; X32-NEXT: kmovd %k1, %edx
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;

File diff suppressed because it is too large Load Diff