mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-24 12:19:53 +00:00
[PowerPC] Fold i1 extensions with other ops
Consider this function from our README.txt file: int foo(int a, int b) { return (a < b) << 4; } We now explicitly track CR bits by default, so the comment in the README.txt about not really having a SETCC is no longer accurate, but we did generate this somewhat silly code: cmpw 0, 3, 4 li 3, 0 li 12, 1 isel 3, 12, 3, 0 sldi 3, 3, 4 blr which generates the zext as a select between 0 and 1, and then shifts the result by a constant amount. Here we preprocess the DAG in order to fold the results of operations on an extension of an i1 value into the SELECT_I[48] pseudo instruction when the resulting constant can be materialized using one instruction (just like the 0 and 1). This was not implemented as a DAGCombine because the resulting code would have been anti-canonical and depends on replacing chained user nodes, which does not fit well into the lowering paradigm. Now we generate: cmpw 0, 3, 4 li 3, 0 li 12, 16 isel 3, 12, 3, 0 blr which is less silly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225203 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
dd0552884b
commit
ccc83e4a08
@ -217,6 +217,7 @@ private:
|
||||
void PeepholeCROps();
|
||||
|
||||
SDValue combineToCMPB(SDNode *N);
|
||||
void foldBoolExts(SDValue &Res, SDNode *&N);
|
||||
|
||||
bool AllUsersSelectZero(SDNode *N);
|
||||
void SwapAllSelectUsers(SDNode *N);
|
||||
@ -3173,6 +3174,73 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
|
||||
return Res;
|
||||
}
|
||||
|
||||
// When CR bit registers are enabled, an extension of an i1 variable to a i32
|
||||
// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
|
||||
// involves constant materialization of a 0 or a 1 or both. If the result of
|
||||
// the extension is then operated upon by some operator that can be constant
|
||||
// folded with a constant 0 or 1, and that constant can be materialized using
|
||||
// only one instruction (like a zero or one), then we should fold in those
|
||||
// operations with the select.
|
||||
void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
|
||||
if (!PPCSubTarget->useCRBits())
|
||||
return;
|
||||
|
||||
if (N->getOpcode() != ISD::ZERO_EXTEND &&
|
||||
N->getOpcode() != ISD::SIGN_EXTEND &&
|
||||
N->getOpcode() != ISD::ANY_EXTEND)
|
||||
return;
|
||||
|
||||
if (N->getOperand(0).getValueType() != MVT::i1)
|
||||
return;
|
||||
|
||||
if (!N->hasOneUse())
|
||||
return;
|
||||
|
||||
SDLoc dl(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue Cond = N->getOperand(0);
|
||||
SDValue ConstTrue =
|
||||
CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, VT);
|
||||
SDValue ConstFalse = CurDAG->getConstant(0, VT);
|
||||
|
||||
do {
|
||||
SDNode *User = *N->use_begin();
|
||||
if (User->getNumOperands() != 2)
|
||||
break;
|
||||
|
||||
auto TryFold = [this, N, User](SDValue Val) {
|
||||
SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
|
||||
SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
|
||||
SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
|
||||
|
||||
return CurDAG->FoldConstantArithmetic(User->getOpcode(),
|
||||
User->getValueType(0),
|
||||
O0.getNode(), O1.getNode());
|
||||
};
|
||||
|
||||
SDValue TrueRes = TryFold(ConstTrue);
|
||||
if (!TrueRes)
|
||||
break;
|
||||
SDValue FalseRes = TryFold(ConstFalse);
|
||||
if (!FalseRes)
|
||||
break;
|
||||
|
||||
// For us to materialize these using one instruction, we must be able to
|
||||
// represent them as signed 16-bit integers.
|
||||
uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
|
||||
False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
|
||||
if (!isInt<16>(True) || !isInt<16>(False))
|
||||
break;
|
||||
|
||||
// We can replace User with a new SELECT node, and try again to see if we
|
||||
// can fold the select with its user.
|
||||
Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
|
||||
N = User;
|
||||
ConstTrue = TrueRes;
|
||||
ConstFalse = FalseRes;
|
||||
} while (N->hasOneUse());
|
||||
}
|
||||
|
||||
void PPCDAGToDAGISel::PreprocessISelDAG() {
|
||||
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
|
||||
++Position;
|
||||
@ -3191,6 +3259,9 @@ void PPCDAGToDAGISel::PreprocessISelDAG() {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!Res)
|
||||
foldBoolExts(Res, N);
|
||||
|
||||
if (Res) {
|
||||
DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
|
||||
DEBUG(N->dump(CurDAG));
|
||||
|
@ -252,23 +252,6 @@ anything though, because the compares still wouldn't be shared.
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
We should custom expand setcc instead of pretending that we have it. That
|
||||
would allow us to expose the access of the crbit after the mfcr, allowing
|
||||
that access to be trivially folded into other ops. A simple example:
|
||||
|
||||
int foo(int a, int b) { return (a < b) << 4; }
|
||||
|
||||
compiles into:
|
||||
|
||||
_foo:
|
||||
cmpw cr7, r3, r4
|
||||
mfcr r2, 1
|
||||
rlwinm r2, r2, 29, 31, 31
|
||||
slwi r3, r2, 4
|
||||
blr
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
Fold add and sub with constant into non-extern, non-weak addresses so this:
|
||||
|
||||
static int a;
|
||||
|
54
test/CodeGen/PowerPC/i1-ext-fold.ll
Normal file
54
test/CodeGen/PowerPC/i1-ext-fold.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define signext i32 @foo(i32 signext %a, i32 signext %b) #0 {
|
||||
entry:
|
||||
%cmp = icmp slt i32 %a, %b
|
||||
%conv = zext i1 %cmp to i32
|
||||
%shl = shl nuw nsw i32 %conv, 4
|
||||
ret i32 %shl
|
||||
|
||||
; CHECK-LABEL: @foo
|
||||
; CHECK-DAG: cmpw
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 0
|
||||
; CHECK-DAG: li [[REG2:[0-9]+]], 16
|
||||
; CHECK: isel 3, [[REG2]], [[REG1]],
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define signext i32 @foo2(i32 signext %a, i32 signext %b) #0 {
|
||||
entry:
|
||||
%cmp = icmp slt i32 %a, %b
|
||||
%conv = zext i1 %cmp to i32
|
||||
%shl = shl nuw nsw i32 %conv, 4
|
||||
%add1 = or i32 %shl, 5
|
||||
ret i32 %add1
|
||||
|
||||
; CHECK-LABEL: @foo2
|
||||
; CHECK-DAG: cmpw
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 5
|
||||
; CHECK-DAG: li [[REG2:[0-9]+]], 21
|
||||
; CHECK: isel 3, [[REG2]], [[REG1]],
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define signext i32 @foo3(i32 signext %a, i32 signext %b) #0 {
|
||||
entry:
|
||||
%cmp = icmp sle i32 %a, %b
|
||||
%conv = zext i1 %cmp to i32
|
||||
%shl = shl nuw nsw i32 %conv, 4
|
||||
ret i32 %shl
|
||||
|
||||
; CHECK-LABEL: @foo3
|
||||
; CHECK-DAG: cmpw
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 16
|
||||
; CHECK: isel 3, 0, [[REG1]],
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
Loading…
Reference in New Issue
Block a user