[DAGCombiner] fold sext into decrement

This is a sibling to rL357178 that I noticed we'd hit if we chose
an alternate transform in D59818.

  %z = zext i8 %x to i32
  %dec = add i32 %z, -1
  %r = sext i32 %dec to i64
  =>
  %z2 = zext i8 %x to i64
  %r = add i64 %z2, -1

https://rise4fun.com/Alive/kPP

The x86 vector diffs show a slight regression, so there's a chance
that we should limit this and the previous transform to scalars.

But given that we allowed vectors before, I'm matching that behavior
here. We should change both transforms together if that's the right
thing to do.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357254 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Sanjay Patel 2019-03-29 13:49:08 +00:00
parent 03b3201cc2
commit 2c4f9d155d
3 changed files with 22 additions and 13 deletions

View File

@ -9039,6 +9039,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
}
// Eliminate this sign extend by doing a decrement in the destination type:
// sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
}
return SDValue();
}

View File

@ -184,7 +184,6 @@ define i32 @zext_decrement_sext(i8 %x) {
; X64: # %bb.0:
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: decl %eax
; X64-NEXT: cwtl
; X64-NEXT: retq
%z = zext i8 %x to i16
%dec = add i16 %z, -1

View File

@ -6458,28 +6458,29 @@ define <8 x i32> @zext_decremenet_sext(<8 x i8> %x) {
; AVX1-LABEL: zext_decremenet_sext:
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: zext_decremenet_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: zext_decremenet_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; X32-SSE2-LABEL: zext_decremenet_sext: