From a316b89267e0a5545d4e0ccc13e30faeb7f31e2d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 2 Apr 2020 12:08:56 -0700 Subject: [PATCH] [X86] Enable combineExtSetcc for vectors larger than 256 bits when we've disabled 512 bit vectors. The compares are going to be type legalized to 256 bits so we might as well fold the extend. --- lib/Target/X86/X86ISelLowering.cpp | 2 +- test/CodeGen/X86/min-legal-vector-width.ll | 40 ++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index eba4db960f5..ec1040e79d7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -44762,7 +44762,7 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, // We can only do this if the vector size in 256 bits or less. unsigned Size = VT.getSizeInBits(); - if (Size > 256) + if (Size > 256 && Subtarget.useAVX512Regs()) return SDValue(); // Don't fold if the condition code can't be handled by PCMPEQ/PCMPGT since diff --git a/test/CodeGen/X86/min-legal-vector-width.ll b/test/CodeGen/X86/min-legal-vector-width.ll index 44ed30394a4..81de0ee4eba 100644 --- a/test/CodeGen/X86/min-legal-vector-width.ll +++ b/test/CodeGen/X86/min-legal-vector-width.ll @@ -1600,3 +1600,43 @@ define i32 @v64i1_inline_asm() "min-legal-vector-width"="256" { %4 = load i32, i32* %1, align 4 ret i32 %4 } + +define void @cmp_v8i64_sext(<8 x i64>* %xptr, <8 x i64>* %yptr, <8 x i64>* %zptr) "min-legal-vector-width"="256" { +; CHECK-LABEL: cmp_v8i64_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovdqa (%rsi), %ymm0 +; CHECK-NEXT: vmovdqa 32(%rsi), %ymm1 +; CHECK-NEXT: vpcmpgtq 32(%rdi), %ymm1, %ymm1 +; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 +; CHECK-NEXT: vmovdqa %ymm0, (%rdx) +; CHECK-NEXT: vmovdqa %ymm1, 32(%rdx) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %x = load <8 x i64>, <8 x i64>* %xptr + %y = load <8 x i64>, <8 x i64>* %yptr + %cmp = icmp slt <8 x i64> %x, %y + %ext = sext <8 x i1> %cmp to <8 x i64> + store <8 x i64> %ext, <8 x i64>* %zptr + ret void +} + +define void @cmp_v8i64_zext(<8 x i64>* %xptr, <8 x i64>* %yptr, <8 x i64>* %zptr) "min-legal-vector-width"="256" { +; CHECK-LABEL: cmp_v8i64_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovdqa (%rsi), %ymm0 +; CHECK-NEXT: vmovdqa 32(%rsi), %ymm1 +; CHECK-NEXT: vpcmpgtq 32(%rdi), %ymm1, %ymm1 +; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 +; CHECK-NEXT: vpsrlq $63, %ymm1, %ymm1 +; CHECK-NEXT: vpsrlq $63, %ymm0, %ymm0 +; CHECK-NEXT: vmovdqa %ymm0, (%rdx) +; CHECK-NEXT: vmovdqa %ymm1, 32(%rdx) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %x = load <8 x i64>, <8 x i64>* %xptr + %y = load <8 x i64>, <8 x i64>* %yptr + %cmp = icmp slt <8 x i64> %x, %y + %ext = zext <8 x i1> %cmp to <8 x i64> + store <8 x i64> %ext, <8 x i64>* %zptr + ret void +}