mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-03 08:51:43 +00:00
[DAGCOmbine] Ensure that (brcond (setcc ...)) is handled in a canonical manner.
Summary: There are transformation that change setcc into other constructs, and transform that try to reconstruct a setcc from the brcond condition. Depending on what order these transform are done, the end result differs. Most of the time, it is preferable to get a setcc as a brcond argument (and this is why brcond try to recreate the setcc in the first place) so we ensure this is done every time by also doing it at the setcc level when the only user is a brcond. Reviewers: spatel, hfinkel, niravd, craig.topper Subscribers: nhaehnle, llvm-commits Differential Revision: https://reviews.llvm.org/D41235 llvm-svn: 325892
This commit is contained in:
parent
3ff1988684
commit
b89f706ba3
@ -415,7 +415,8 @@ namespace {
|
||||
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
|
||||
const SDLoc &DL);
|
||||
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
|
||||
const SDLoc &DL, bool foldBooleans = true);
|
||||
const SDLoc &DL, bool foldBooleans);
|
||||
SDValue rebuildSetCC(SDValue N);
|
||||
|
||||
bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
|
||||
SDValue &CC) const;
|
||||
@ -7157,9 +7158,33 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitSETCC(SDNode *N) {
|
||||
return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
|
||||
cast<CondCodeSDNode>(N->getOperand(2))->get(),
|
||||
SDLoc(N));
|
||||
// setcc is very commonly used as an argument to brcond. This pattern
|
||||
// also lend itself to numerous combines and, as a result, it is desired
|
||||
// we keep the argument to a brcond as a setcc as much as possible.
|
||||
bool PreferSetCC =
|
||||
N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
|
||||
|
||||
SDValue Combined = SimplifySetCC(
|
||||
N->getValueType(0), N->getOperand(0), N->getOperand(1),
|
||||
cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
|
||||
|
||||
if (!Combined)
|
||||
return SDValue();
|
||||
|
||||
// If we prefer to have a setcc, and we don't, we'll try our best to
|
||||
// recreate one using rebuildSetCC.
|
||||
if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
|
||||
SDValue NewSetCC = rebuildSetCC(Combined);
|
||||
|
||||
// We don't have anything interesting to combine to.
|
||||
if (NewSetCC.getNode() == N)
|
||||
return SDValue();
|
||||
|
||||
if (NewSetCC)
|
||||
return NewSetCC;
|
||||
}
|
||||
|
||||
return Combined;
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitSETCCE(SDNode *N) {
|
||||
@ -11151,16 +11176,22 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
|
||||
N1.getOperand(0), N1.getOperand(1), N2);
|
||||
}
|
||||
|
||||
if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
|
||||
((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
|
||||
(N1.getOperand(0).hasOneUse() &&
|
||||
N1.getOperand(0).getOpcode() == ISD::SRL))) {
|
||||
SDNode *Trunc = nullptr;
|
||||
if (N1.getOpcode() == ISD::TRUNCATE) {
|
||||
// Look pass the truncate.
|
||||
Trunc = N1.getNode();
|
||||
N1 = N1.getOperand(0);
|
||||
}
|
||||
if (N1.hasOneUse()) {
|
||||
if (SDValue NewN1 = rebuildSetCC(N1))
|
||||
return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::rebuildSetCC(SDValue N) {
|
||||
if (N.getOpcode() == ISD::SRL ||
|
||||
(N.getOpcode() == ISD::TRUNCATE &&
|
||||
(N.getOperand(0).hasOneUse() &&
|
||||
N.getOperand(0).getOpcode() == ISD::SRL))) {
|
||||
// Look pass the truncate.
|
||||
if (N.getOpcode() == ISD::TRUNCATE)
|
||||
N = N.getOperand(0);
|
||||
|
||||
// Match this pattern so that we can generate simpler code:
|
||||
//
|
||||
@ -11179,74 +11210,42 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
|
||||
// This applies only when the AND constant value has one bit set and the
|
||||
// SRL constant is equal to the log2 of the AND constant. The back-end is
|
||||
// smart enough to convert the result into a TEST/JMP sequence.
|
||||
SDValue Op0 = N1.getOperand(0);
|
||||
SDValue Op1 = N1.getOperand(1);
|
||||
SDValue Op0 = N.getOperand(0);
|
||||
SDValue Op1 = N.getOperand(1);
|
||||
|
||||
if (Op0.getOpcode() == ISD::AND &&
|
||||
Op1.getOpcode() == ISD::Constant) {
|
||||
if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
|
||||
SDValue AndOp1 = Op0.getOperand(1);
|
||||
|
||||
if (AndOp1.getOpcode() == ISD::Constant) {
|
||||
const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
|
||||
|
||||
if (AndConst.isPowerOf2() &&
|
||||
cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
|
||||
cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
|
||||
SDLoc DL(N);
|
||||
SDValue SetCC =
|
||||
DAG.getSetCC(DL,
|
||||
getSetCCResultType(Op0.getValueType()),
|
||||
Op0, DAG.getConstant(0, DL, Op0.getValueType()),
|
||||
ISD::SETNE);
|
||||
|
||||
SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
|
||||
MVT::Other, Chain, SetCC, N2);
|
||||
// Don't add the new BRCond into the worklist or else SimplifySelectCC
|
||||
// will convert it back to (X & C1) >> C2.
|
||||
CombineTo(N, NewBRCond, false);
|
||||
// Truncate is dead.
|
||||
if (Trunc)
|
||||
deleteAndRecombine(Trunc);
|
||||
// Replace the uses of SRL with SETCC
|
||||
WorklistRemover DeadNodes(*this);
|
||||
DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
|
||||
deleteAndRecombine(N1.getNode());
|
||||
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
||||
return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
|
||||
Op0, DAG.getConstant(0, DL, Op0.getValueType()),
|
||||
ISD::SETNE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (Trunc)
|
||||
// Restore N1 if the above transformation doesn't match.
|
||||
N1 = N->getOperand(1);
|
||||
}
|
||||
|
||||
// Transform br(xor(x, y)) -> br(x != y)
|
||||
// Transform br(xor(xor(x,y), 1)) -> br (x == y)
|
||||
if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
|
||||
SDNode *TheXor = N1.getNode();
|
||||
if (N.getOpcode() == ISD::XOR) {
|
||||
SDNode *TheXor = N.getNode();
|
||||
|
||||
// Avoid missing important xor optimizations.
|
||||
while (SDValue Tmp = visitXOR(TheXor)) {
|
||||
// We don't have a XOR anymore, bail.
|
||||
if (Tmp.getOpcode() != ISD::XOR)
|
||||
return Tmp;
|
||||
|
||||
TheXor = Tmp.getNode();
|
||||
}
|
||||
|
||||
SDValue Op0 = TheXor->getOperand(0);
|
||||
SDValue Op1 = TheXor->getOperand(1);
|
||||
if (Op0.getOpcode() == Op1.getOpcode()) {
|
||||
// Avoid missing important xor optimizations.
|
||||
if (SDValue Tmp = visitXOR(TheXor)) {
|
||||
if (Tmp.getNode() != TheXor) {
|
||||
DEBUG(dbgs() << "\nReplacing.8 ";
|
||||
TheXor->dump(&DAG);
|
||||
dbgs() << "\nWith: ";
|
||||
Tmp.getNode()->dump(&DAG);
|
||||
dbgs() << '\n');
|
||||
WorklistRemover DeadNodes(*this);
|
||||
DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
|
||||
deleteAndRecombine(TheXor);
|
||||
return DAG.getNode(ISD::BRCOND, SDLoc(N),
|
||||
MVT::Other, Chain, Tmp, N2);
|
||||
}
|
||||
|
||||
// visitXOR has changed XOR's operands or replaced the XOR completely,
|
||||
// bail out.
|
||||
return SDValue(N, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
|
||||
bool Equal = false;
|
||||
@ -11256,19 +11255,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
|
||||
Equal = true;
|
||||
}
|
||||
|
||||
EVT SetCCVT = N1.getValueType();
|
||||
EVT SetCCVT = N.getValueType();
|
||||
if (LegalTypes)
|
||||
SetCCVT = getSetCCResultType(SetCCVT);
|
||||
SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
|
||||
SetCCVT,
|
||||
Op0, Op1,
|
||||
Equal ? ISD::SETEQ : ISD::SETNE);
|
||||
// Replace the uses of XOR with SETCC
|
||||
WorklistRemover DeadNodes(*this);
|
||||
DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
|
||||
deleteAndRecombine(N1.getNode());
|
||||
return DAG.getNode(ISD::BRCOND, SDLoc(N),
|
||||
MVT::Other, Chain, SetCC, N2);
|
||||
return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
|
||||
Equal ? ISD::SETEQ : ISD::SETNE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2800,6 +2800,8 @@ def: Pat<(brcond (not I1:$Pu), bb:$dst),
|
||||
(J2_jumpf I1:$Pu, bb:$dst)>;
|
||||
def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst),
|
||||
(J2_jumpf I1:$Pu, bb:$dst)>;
|
||||
def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst),
|
||||
(J2_jumpf I1:$Pu, bb:$dst)>;
|
||||
def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst),
|
||||
(J2_jumpt I1:$Pu, bb:$dst)>;
|
||||
|
||||
|
@ -7,7 +7,6 @@ declare i1 @llvm.amdgcn.class.f32(float, i32)
|
||||
; GCN-LABEL: {{^}}vcc_shrink_vcc_def:
|
||||
; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
|
||||
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
|
||||
; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}}
|
||||
define amdgpu_kernel void @vcc_shrink_vcc_def(float %arg, i32 %arg1, float %arg2, i32 %arg3) {
|
||||
bb0:
|
||||
%tmp = icmp sgt i32 %arg1, 4
|
||||
@ -34,7 +33,6 @@ bb2:
|
||||
; GCN-LABEL: {{^}}preserve_condition_undef_flag:
|
||||
; GCN-NOT: vcc
|
||||
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
|
||||
; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}}
|
||||
define amdgpu_kernel void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) {
|
||||
bb0:
|
||||
%tmp = icmp sgt i32 %arg1, 4
|
||||
|
@ -397,9 +397,9 @@ endif:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: setcc-i1-and-xor
|
||||
; GCN-DAG: v_cmp_ge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
|
||||
; GCN-DAG: v_cmp_le_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0
|
||||
; GCN: s_and_b64 s[2:3], [[A]], [[B]]
|
||||
; GCN-DAG: v_cmp_nge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
|
||||
; GCN-DAG: v_cmp_nle_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0
|
||||
; GCN: s_or_b64 s[2:3], [[A]], [[B]]
|
||||
define amdgpu_kernel void @setcc-i1-and-xor(i32 addrspace(1)* %out, float %cond) #0 {
|
||||
bb0:
|
||||
%tmp5 = fcmp oge float %cond, 0.000000e+00
|
||||
|
@ -14,8 +14,8 @@ define i32 @and_sink1(i32 %a, i1 %c) {
|
||||
; CHECK-NEXT: je .LBB0_3
|
||||
; CHECK-NEXT: # %bb.1: # %bb0
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl $0, A
|
||||
; CHECK-NEXT: testb $4, %al
|
||||
; CHECK-NEXT: movl $0, A
|
||||
; CHECK-NEXT: jne .LBB0_3
|
||||
; CHECK-NEXT: # %bb.2: # %bb1
|
||||
; CHECK-NEXT: movl $1, %eax
|
||||
@ -61,8 +61,8 @@ define i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
|
||||
; CHECK-NEXT: je .LBB1_5
|
||||
; CHECK-NEXT: # %bb.3: # %bb1
|
||||
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
|
||||
; CHECK-NEXT: movl $0, C
|
||||
; CHECK-NEXT: testb $4, %cl
|
||||
; CHECK-NEXT: movl $0, C
|
||||
; CHECK-NEXT: jne .LBB1_2
|
||||
; CHECK-NEXT: # %bb.4: # %bb2
|
||||
; CHECK-NEXT: movl $1, %eax
|
||||
|
@ -1146,12 +1146,9 @@ b:
|
||||
define void @and32_imm_br() nounwind {
|
||||
; CHECK-LABEL: and32_imm_br:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movl $-2147483648, %eax # encoding: [0xb8,0x00,0x00,0x00,0x80]
|
||||
; CHECK-NEXT: andl $-2147483648, {{.*}}(%rip) # encoding: [0x81,0x25,A,A,A,A,0x00,0x00,0x00,0x80]
|
||||
; CHECK-NEXT: # fixup A - offset: 2, value: g32-8, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: # imm = 0x80000000
|
||||
; CHECK-NEXT: andl {{.*}}(%rip), %eax # encoding: [0x23,0x05,A,A,A,A]
|
||||
; CHECK-NEXT: # fixup A - offset: 2, value: g32-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: movl %eax, {{.*}}(%rip) # encoding: [0x89,0x05,A,A,A,A]
|
||||
; CHECK-NEXT: # fixup A - offset: 2, value: g32-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: jne .LBB35_2 # encoding: [0x75,A]
|
||||
; CHECK-NEXT: # fixup A - offset: 1, value: .LBB35_2-1, kind: FK_PCRel_1
|
||||
; CHECK-NEXT: # %bb.1: # %a
|
||||
@ -1244,13 +1241,9 @@ b:
|
||||
define void @and16_imm_br() nounwind {
|
||||
; CHECK-LABEL: and16_imm_br:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movzwl {{.*}}(%rip), %eax # encoding: [0x0f,0xb7,0x05,A,A,A,A]
|
||||
; CHECK-NEXT: # fixup A - offset: 3, value: g16-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
|
||||
; CHECK-NEXT: andw $-32768, {{.*}}(%rip) # encoding: [0x66,0x81,0x25,A,A,A,A,0x00,0x80]
|
||||
; CHECK-NEXT: # fixup A - offset: 3, value: g16-6, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: # imm = 0x8000
|
||||
; CHECK-NEXT: movw %ax, {{.*}}(%rip) # encoding: [0x66,0x89,0x05,A,A,A,A]
|
||||
; CHECK-NEXT: # fixup A - offset: 3, value: g16-4, kind: reloc_riprel_4byte
|
||||
; CHECK-NEXT: testw %ax, %ax # encoding: [0x66,0x85,0xc0]
|
||||
; CHECK-NEXT: jne .LBB38_2 # encoding: [0x75,A]
|
||||
; CHECK-NEXT: # fixup A - offset: 1, value: .LBB38_2-1, kind: FK_PCRel_1
|
||||
; CHECK-NEXT: # %bb.1: # %a
|
||||
|
@ -19,11 +19,10 @@ define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
|
||||
; JUMP1-LABEL: foo:
|
||||
; JUMP1: # %bb.0: # %entry
|
||||
; JUMP1-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
||||
; JUMP1-NEXT: sete %al
|
||||
; JUMP1-NEXT: cmpl $5, {{[0-9]+}}(%esp)
|
||||
; JUMP1-NEXT: setl %cl
|
||||
; JUMP1-NEXT: orb %al, %cl
|
||||
; JUMP1-NEXT: cmpb $1, %cl
|
||||
; JUMP1-NEXT: setne %al
|
||||
; JUMP1-NEXT: cmpl $4, {{[0-9]+}}(%esp)
|
||||
; JUMP1-NEXT: setg %cl
|
||||
; JUMP1-NEXT: testb %al, %cl
|
||||
; JUMP1-NEXT: jne .LBB0_1
|
||||
; JUMP1-NEXT: # %bb.2: # %cond_true
|
||||
; JUMP1-NEXT: jmp bar # TAILCALL
|
||||
@ -50,11 +49,10 @@ define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind {
|
||||
; JUMP2-LABEL: unpredictable:
|
||||
; JUMP2: # %bb.0: # %entry
|
||||
; JUMP2-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
||||
; JUMP2-NEXT: sete %al
|
||||
; JUMP2-NEXT: cmpl $5, {{[0-9]+}}(%esp)
|
||||
; JUMP2-NEXT: setl %cl
|
||||
; JUMP2-NEXT: orb %al, %cl
|
||||
; JUMP2-NEXT: cmpb $1, %cl
|
||||
; JUMP2-NEXT: setne %al
|
||||
; JUMP2-NEXT: cmpl $4, {{[0-9]+}}(%esp)
|
||||
; JUMP2-NEXT: setg %cl
|
||||
; JUMP2-NEXT: testb %al, %cl
|
||||
; JUMP2-NEXT: jne .LBB1_1
|
||||
; JUMP2-NEXT: # %bb.2: # %cond_true
|
||||
; JUMP2-NEXT: jmp bar # TAILCALL
|
||||
@ -64,11 +62,10 @@ define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind {
|
||||
; JUMP1-LABEL: unpredictable:
|
||||
; JUMP1: # %bb.0: # %entry
|
||||
; JUMP1-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
||||
; JUMP1-NEXT: sete %al
|
||||
; JUMP1-NEXT: cmpl $5, {{[0-9]+}}(%esp)
|
||||
; JUMP1-NEXT: setl %cl
|
||||
; JUMP1-NEXT: orb %al, %cl
|
||||
; JUMP1-NEXT: cmpb $1, %cl
|
||||
; JUMP1-NEXT: setne %al
|
||||
; JUMP1-NEXT: cmpl $4, {{[0-9]+}}(%esp)
|
||||
; JUMP1-NEXT: setg %cl
|
||||
; JUMP1-NEXT: testb %al, %cl
|
||||
; JUMP1-NEXT: jne .LBB1_1
|
||||
; JUMP1-NEXT: # %bb.2: # %cond_true
|
||||
; JUMP1-NEXT: jmp bar # TAILCALL
|
||||
|
Loading…
Reference in New Issue
Block a user