[DAGCOmbine] Ensure that (brcond (setcc ...)) is handled in a canonical manner.

Summary:
There are transformation that change setcc into other constructs, and transform that try to reconstruct a setcc from the brcond condition. Depending on what order these transform are done, the end result differs.

Most of the time, it is preferable to get a setcc as a brcond argument (and this is why brcond try to recreate the setcc in the first place) so we ensure this is done every time by also doing it at the setcc level when the only user is a brcond.

Reviewers: spatel, hfinkel, niravd, craig.topper

Subscribers: nhaehnle, llvm-commits

Differential Revision: https://reviews.llvm.org/D41235

llvm-svn: 325892
This commit is contained in:
Amaury Sechet 2018-02-23 11:50:42 +00:00
parent 3ff1988684
commit b89f706ba3
7 changed files with 90 additions and 108 deletions

View File

@ -415,7 +415,8 @@ namespace {
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
const SDLoc &DL);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, bool foldBooleans = true);
const SDLoc &DL, bool foldBooleans);
SDValue rebuildSetCC(SDValue N);
bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
SDValue &CC) const;
@ -7157,9 +7158,33 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
}
SDValue DAGCombiner::visitSETCC(SDNode *N) {
return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
cast<CondCodeSDNode>(N->getOperand(2))->get(),
SDLoc(N));
// setcc is very commonly used as an argument to brcond. This pattern
// also lend itself to numerous combines and, as a result, it is desired
// we keep the argument to a brcond as a setcc as much as possible.
bool PreferSetCC =
N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
SDValue Combined = SimplifySetCC(
N->getValueType(0), N->getOperand(0), N->getOperand(1),
cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
if (!Combined)
return SDValue();
// If we prefer to have a setcc, and we don't, we'll try our best to
// recreate one using rebuildSetCC.
if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
SDValue NewSetCC = rebuildSetCC(Combined);
// We don't have anything interesting to combine to.
if (NewSetCC.getNode() == N)
return SDValue();
if (NewSetCC)
return NewSetCC;
}
return Combined;
}
SDValue DAGCombiner::visitSETCCE(SDNode *N) {
@ -11151,16 +11176,22 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
N1.getOperand(0), N1.getOperand(1), N2);
}
if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
(N1.getOperand(0).hasOneUse() &&
N1.getOperand(0).getOpcode() == ISD::SRL))) {
SDNode *Trunc = nullptr;
if (N1.getOpcode() == ISD::TRUNCATE) {
// Look pass the truncate.
Trunc = N1.getNode();
N1 = N1.getOperand(0);
}
if (N1.hasOneUse()) {
if (SDValue NewN1 = rebuildSetCC(N1))
return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
}
return SDValue();
}
SDValue DAGCombiner::rebuildSetCC(SDValue N) {
if (N.getOpcode() == ISD::SRL ||
(N.getOpcode() == ISD::TRUNCATE &&
(N.getOperand(0).hasOneUse() &&
N.getOperand(0).getOpcode() == ISD::SRL))) {
// Look pass the truncate.
if (N.getOpcode() == ISD::TRUNCATE)
N = N.getOperand(0);
// Match this pattern so that we can generate simpler code:
//
@ -11179,74 +11210,42 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
// This applies only when the AND constant value has one bit set and the
// SRL constant is equal to the log2 of the AND constant. The back-end is
// smart enough to convert the result into a TEST/JMP sequence.
SDValue Op0 = N1.getOperand(0);
SDValue Op1 = N1.getOperand(1);
SDValue Op0 = N.getOperand(0);
SDValue Op1 = N.getOperand(1);
if (Op0.getOpcode() == ISD::AND &&
Op1.getOpcode() == ISD::Constant) {
if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
SDValue AndOp1 = Op0.getOperand(1);
if (AndOp1.getOpcode() == ISD::Constant) {
const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
if (AndConst.isPowerOf2() &&
cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
SDLoc DL(N);
SDValue SetCC =
DAG.getSetCC(DL,
getSetCCResultType(Op0.getValueType()),
Op0, DAG.getConstant(0, DL, Op0.getValueType()),
ISD::SETNE);
SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
MVT::Other, Chain, SetCC, N2);
// Don't add the new BRCond into the worklist or else SimplifySelectCC
// will convert it back to (X & C1) >> C2.
CombineTo(N, NewBRCond, false);
// Truncate is dead.
if (Trunc)
deleteAndRecombine(Trunc);
// Replace the uses of SRL with SETCC
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
deleteAndRecombine(N1.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
Op0, DAG.getConstant(0, DL, Op0.getValueType()),
ISD::SETNE);
}
}
}
if (Trunc)
// Restore N1 if the above transformation doesn't match.
N1 = N->getOperand(1);
}
// Transform br(xor(x, y)) -> br(x != y)
// Transform br(xor(xor(x,y), 1)) -> br (x == y)
if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
SDNode *TheXor = N1.getNode();
if (N.getOpcode() == ISD::XOR) {
SDNode *TheXor = N.getNode();
// Avoid missing important xor optimizations.
while (SDValue Tmp = visitXOR(TheXor)) {
// We don't have a XOR anymore, bail.
if (Tmp.getOpcode() != ISD::XOR)
return Tmp;
TheXor = Tmp.getNode();
}
SDValue Op0 = TheXor->getOperand(0);
SDValue Op1 = TheXor->getOperand(1);
if (Op0.getOpcode() == Op1.getOpcode()) {
// Avoid missing important xor optimizations.
if (SDValue Tmp = visitXOR(TheXor)) {
if (Tmp.getNode() != TheXor) {
DEBUG(dbgs() << "\nReplacing.8 ";
TheXor->dump(&DAG);
dbgs() << "\nWith: ";
Tmp.getNode()->dump(&DAG);
dbgs() << '\n');
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
deleteAndRecombine(TheXor);
return DAG.getNode(ISD::BRCOND, SDLoc(N),
MVT::Other, Chain, Tmp, N2);
}
// visitXOR has changed XOR's operands or replaced the XOR completely,
// bail out.
return SDValue(N, 0);
}
}
if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
bool Equal = false;
@ -11256,19 +11255,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Equal = true;
}
EVT SetCCVT = N1.getValueType();
EVT SetCCVT = N.getValueType();
if (LegalTypes)
SetCCVT = getSetCCResultType(SetCCVT);
SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
SetCCVT,
Op0, Op1,
Equal ? ISD::SETEQ : ISD::SETNE);
// Replace the uses of XOR with SETCC
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
deleteAndRecombine(N1.getNode());
return DAG.getNode(ISD::BRCOND, SDLoc(N),
MVT::Other, Chain, SetCC, N2);
return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
Equal ? ISD::SETEQ : ISD::SETNE);
}
}

View File

@ -2800,6 +2800,8 @@ def: Pat<(brcond (not I1:$Pu), bb:$dst),
(J2_jumpf I1:$Pu, bb:$dst)>;
def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst),
(J2_jumpf I1:$Pu, bb:$dst)>;
def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst),
(J2_jumpf I1:$Pu, bb:$dst)>;
def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst),
(J2_jumpt I1:$Pu, bb:$dst)>;

View File

@ -7,7 +7,6 @@ declare i1 @llvm.amdgcn.class.f32(float, i32)
; GCN-LABEL: {{^}}vcc_shrink_vcc_def:
; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @vcc_shrink_vcc_def(float %arg, i32 %arg1, float %arg2, i32 %arg3) {
bb0:
%tmp = icmp sgt i32 %arg1, 4
@ -34,7 +33,6 @@ bb2:
; GCN-LABEL: {{^}}preserve_condition_undef_flag:
; GCN-NOT: vcc
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) {
bb0:
%tmp = icmp sgt i32 %arg1, 4

View File

@ -397,9 +397,9 @@ endif:
}
; FUNC-LABEL: setcc-i1-and-xor
; GCN-DAG: v_cmp_ge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
; GCN-DAG: v_cmp_le_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0
; GCN: s_and_b64 s[2:3], [[A]], [[B]]
; GCN-DAG: v_cmp_nge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
; GCN-DAG: v_cmp_nle_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0
; GCN: s_or_b64 s[2:3], [[A]], [[B]]
define amdgpu_kernel void @setcc-i1-and-xor(i32 addrspace(1)* %out, float %cond) #0 {
bb0:
%tmp5 = fcmp oge float %cond, 0.000000e+00

View File

@ -14,8 +14,8 @@ define i32 @and_sink1(i32 %a, i1 %c) {
; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: # %bb.1: # %bb0
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl $0, A
; CHECK-NEXT: testb $4, %al
; CHECK-NEXT: movl $0, A
; CHECK-NEXT: jne .LBB0_3
; CHECK-NEXT: # %bb.2: # %bb1
; CHECK-NEXT: movl $1, %eax
@ -61,8 +61,8 @@ define i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
; CHECK-NEXT: je .LBB1_5
; CHECK-NEXT: # %bb.3: # %bb1
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movl $0, C
; CHECK-NEXT: testb $4, %cl
; CHECK-NEXT: movl $0, C
; CHECK-NEXT: jne .LBB1_2
; CHECK-NEXT: # %bb.4: # %bb2
; CHECK-NEXT: movl $1, %eax

View File

@ -1146,12 +1146,9 @@ b:
define void @and32_imm_br() nounwind {
; CHECK-LABEL: and32_imm_br:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl $-2147483648, %eax # encoding: [0xb8,0x00,0x00,0x00,0x80]
; CHECK-NEXT: andl $-2147483648, {{.*}}(%rip) # encoding: [0x81,0x25,A,A,A,A,0x00,0x00,0x00,0x80]
; CHECK-NEXT: # fixup A - offset: 2, value: g32-8, kind: reloc_riprel_4byte
; CHECK-NEXT: # imm = 0x80000000
; CHECK-NEXT: andl {{.*}}(%rip), %eax # encoding: [0x23,0x05,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: g32-4, kind: reloc_riprel_4byte
; CHECK-NEXT: movl %eax, {{.*}}(%rip) # encoding: [0x89,0x05,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: g32-4, kind: reloc_riprel_4byte
; CHECK-NEXT: jne .LBB35_2 # encoding: [0x75,A]
; CHECK-NEXT: # fixup A - offset: 1, value: .LBB35_2-1, kind: FK_PCRel_1
; CHECK-NEXT: # %bb.1: # %a
@ -1244,13 +1241,9 @@ b:
define void @and16_imm_br() nounwind {
; CHECK-LABEL: and16_imm_br:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movzwl {{.*}}(%rip), %eax # encoding: [0x0f,0xb7,0x05,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: g16-4, kind: reloc_riprel_4byte
; CHECK-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
; CHECK-NEXT: andw $-32768, {{.*}}(%rip) # encoding: [0x66,0x81,0x25,A,A,A,A,0x00,0x80]
; CHECK-NEXT: # fixup A - offset: 3, value: g16-6, kind: reloc_riprel_4byte
; CHECK-NEXT: # imm = 0x8000
; CHECK-NEXT: movw %ax, {{.*}}(%rip) # encoding: [0x66,0x89,0x05,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: g16-4, kind: reloc_riprel_4byte
; CHECK-NEXT: testw %ax, %ax # encoding: [0x66,0x85,0xc0]
; CHECK-NEXT: jne .LBB38_2 # encoding: [0x75,A]
; CHECK-NEXT: # fixup A - offset: 1, value: .LBB38_2-1, kind: FK_PCRel_1
; CHECK-NEXT: # %bb.1: # %a

View File

@ -19,11 +19,10 @@ define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
; JUMP1-LABEL: foo:
; JUMP1: # %bb.0: # %entry
; JUMP1-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; JUMP1-NEXT: sete %al
; JUMP1-NEXT: cmpl $5, {{[0-9]+}}(%esp)
; JUMP1-NEXT: setl %cl
; JUMP1-NEXT: orb %al, %cl
; JUMP1-NEXT: cmpb $1, %cl
; JUMP1-NEXT: setne %al
; JUMP1-NEXT: cmpl $4, {{[0-9]+}}(%esp)
; JUMP1-NEXT: setg %cl
; JUMP1-NEXT: testb %al, %cl
; JUMP1-NEXT: jne .LBB0_1
; JUMP1-NEXT: # %bb.2: # %cond_true
; JUMP1-NEXT: jmp bar # TAILCALL
@ -50,11 +49,10 @@ define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind {
; JUMP2-LABEL: unpredictable:
; JUMP2: # %bb.0: # %entry
; JUMP2-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; JUMP2-NEXT: sete %al
; JUMP2-NEXT: cmpl $5, {{[0-9]+}}(%esp)
; JUMP2-NEXT: setl %cl
; JUMP2-NEXT: orb %al, %cl
; JUMP2-NEXT: cmpb $1, %cl
; JUMP2-NEXT: setne %al
; JUMP2-NEXT: cmpl $4, {{[0-9]+}}(%esp)
; JUMP2-NEXT: setg %cl
; JUMP2-NEXT: testb %al, %cl
; JUMP2-NEXT: jne .LBB1_1
; JUMP2-NEXT: # %bb.2: # %cond_true
; JUMP2-NEXT: jmp bar # TAILCALL
@ -64,11 +62,10 @@ define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind {
; JUMP1-LABEL: unpredictable:
; JUMP1: # %bb.0: # %entry
; JUMP1-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; JUMP1-NEXT: sete %al
; JUMP1-NEXT: cmpl $5, {{[0-9]+}}(%esp)
; JUMP1-NEXT: setl %cl
; JUMP1-NEXT: orb %al, %cl
; JUMP1-NEXT: cmpb $1, %cl
; JUMP1-NEXT: setne %al
; JUMP1-NEXT: cmpl $4, {{[0-9]+}}(%esp)
; JUMP1-NEXT: setg %cl
; JUMP1-NEXT: testb %al, %cl
; JUMP1-NEXT: jne .LBB1_1
; JUMP1-NEXT: # %bb.2: # %cond_true
; JUMP1-NEXT: jmp bar # TAILCALL