mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-19 11:41:53 +00:00
[AArch64] Improve codegen for inverted overflow checking intrinsics
E.g. if we have a (xor(overflow-bit), 1) where overflow-bit comes from an intrinsic like llvm.sadd.with.overflow then we can kill the xor and use the inverted condition code for the CSEL. rdar://28495949 Reviewed By: kristof.beyls Differential Revision: https://reviews.llvm.org/D38160 llvm-svn: 315205
This commit is contained in:
parent
ab6a4185c7
commit
7210057227
@ -1972,10 +1972,41 @@ SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
|
||||
return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
|
||||
}
|
||||
|
||||
// Returns true if the given Op is the overflow flag result of an overflow
|
||||
// intrinsic operation.
|
||||
static bool isOverflowIntrOpRes(SDValue Op) {
|
||||
unsigned Opc = Op.getOpcode();
|
||||
return (Op.getResNo() == 1 &&
|
||||
(Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
|
||||
Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
|
||||
}
|
||||
|
||||
static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
|
||||
SDValue Sel = Op.getOperand(0);
|
||||
SDValue Other = Op.getOperand(1);
|
||||
SDLoc dl(Sel);
|
||||
|
||||
// If the operand is an overflow checking operation, invert the condition
|
||||
// code and kill the Not operation. I.e., transform:
|
||||
// (xor (overflow_op_bool, 1))
|
||||
// -->
|
||||
// (csel 1, 0, invert(cc), overflow_op_bool)
|
||||
// ... which later gets transformed to just a cset instruction with an
|
||||
// inverted condition code, rather than a cset + eor sequence.
|
||||
if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
|
||||
// Only lower legal XALUO ops.
|
||||
if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
|
||||
return SDValue();
|
||||
|
||||
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
|
||||
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
|
||||
AArch64CC::CondCode CC;
|
||||
SDValue Value, Overflow;
|
||||
std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
|
||||
SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
|
||||
return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
|
||||
CCVal, Overflow);
|
||||
}
|
||||
// If neither operand is a SELECT_CC, give up.
|
||||
if (Sel.getOpcode() != ISD::SELECT_CC)
|
||||
std::swap(Sel, Other);
|
||||
@ -1994,7 +2025,6 @@ static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
|
||||
SDValue RHS = Sel.getOperand(1);
|
||||
SDValue TVal = Sel.getOperand(2);
|
||||
SDValue FVal = Sel.getOperand(3);
|
||||
SDLoc dl(Sel);
|
||||
|
||||
// FIXME: This could be generalized to non-integer comparisons.
|
||||
if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
|
||||
@ -3958,10 +3988,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
// Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
|
||||
// instruction.
|
||||
unsigned Opc = LHS.getOpcode();
|
||||
if (LHS.getResNo() == 1 && isOneConstant(RHS) &&
|
||||
(Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
|
||||
Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
|
||||
if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS)) {
|
||||
assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
|
||||
"Unexpected condition code.");
|
||||
// Only lower legal XALUO ops.
|
||||
@ -4453,12 +4480,9 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
|
||||
SDValue FVal = Op->getOperand(2);
|
||||
SDLoc DL(Op);
|
||||
|
||||
unsigned Opc = CCVal.getOpcode();
|
||||
// Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
|
||||
// instruction.
|
||||
if (CCVal.getResNo() == 1 &&
|
||||
(Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
|
||||
Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
|
||||
if (isOverflowIntrOpRes(CCVal)) {
|
||||
// Only lower legal XALUO ops.
|
||||
if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
|
||||
return SDValue();
|
||||
|
@ -282,6 +282,17 @@ entry:
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i1 @saddo.not.i32(i32 %v1, i32 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: saddo.not.i32
|
||||
; CHECK: cmn w0, w1
|
||||
; CHECK-NEXT: cset w0, vc
|
||||
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
|
||||
%obit = extractvalue {i32, i1} %t, 1
|
||||
%ret = xor i1 %obit, true
|
||||
ret i1 %ret
|
||||
}
|
||||
|
||||
define i64 @saddo.select.i64(i64 %v1, i64 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: saddo.select.i64
|
||||
@ -293,6 +304,17 @@ entry:
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i1 @saddo.not.i64(i64 %v1, i64 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: saddo.not.i64
|
||||
; CHECK: cmn x0, x1
|
||||
; CHECK-NEXT: cset w0, vc
|
||||
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
|
||||
%obit = extractvalue {i64, i1} %t, 1
|
||||
%ret = xor i1 %obit, true
|
||||
ret i1 %ret
|
||||
}
|
||||
|
||||
define i32 @uaddo.select.i32(i32 %v1, i32 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: uaddo.select.i32
|
||||
@ -304,6 +326,17 @@ entry:
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i1 @uaddo.not.i32(i32 %v1, i32 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: uaddo.not.i32
|
||||
; CHECK: cmn w0, w1
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
%t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
|
||||
%obit = extractvalue {i32, i1} %t, 1
|
||||
%ret = xor i1 %obit, true
|
||||
ret i1 %ret
|
||||
}
|
||||
|
||||
define i64 @uaddo.select.i64(i64 %v1, i64 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: uaddo.select.i64
|
||||
@ -315,6 +348,17 @@ entry:
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i1 @uaddo.not.i64(i64 %v1, i64 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: uaddo.not.i64
|
||||
; CHECK: cmn x0, x1
|
||||
; CHECK-NEXT: cset w0, lo
|
||||
%t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
|
||||
%obit = extractvalue {i64, i1} %t, 1
|
||||
%ret = xor i1 %obit, true
|
||||
ret i1 %ret
|
||||
}
|
||||
|
||||
define i32 @ssubo.select.i32(i32 %v1, i32 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: ssubo.select.i32
|
||||
@ -326,6 +370,17 @@ entry:
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i1 @ssubo.not.i32(i32 %v1, i32 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: ssubo.not.i32
|
||||
; CHECK: cmp w0, w1
|
||||
; CHECK-NEXT: cset w0, vc
|
||||
%t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
|
||||
%obit = extractvalue {i32, i1} %t, 1
|
||||
%ret = xor i1 %obit, true
|
||||
ret i1 %ret
|
||||
}
|
||||
|
||||
define i64 @ssubo.select.i64(i64 %v1, i64 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: ssubo.select.i64
|
||||
@ -337,6 +392,17 @@ entry:
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i1 @ssub.not.i64(i64 %v1, i64 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: ssub.not.i64
|
||||
; CHECK: cmp x0, x1
|
||||
; CHECK-NEXT: cset w0, vc
|
||||
%t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
|
||||
%obit = extractvalue {i64, i1} %t, 1
|
||||
%ret = xor i1 %obit, true
|
||||
ret i1 %ret
|
||||
}
|
||||
|
||||
define i32 @usubo.select.i32(i32 %v1, i32 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: usubo.select.i32
|
||||
@ -348,6 +414,17 @@ entry:
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i1 @usubo.not.i32(i32 %v1, i32 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: usubo.not.i32
|
||||
; CHECK: cmp w0, w1
|
||||
; CHECK-NEXT: cset w0, hs
|
||||
%t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
|
||||
%obit = extractvalue {i32, i1} %t, 1
|
||||
%ret = xor i1 %obit, true
|
||||
ret i1 %ret
|
||||
}
|
||||
|
||||
define i64 @usubo.select.i64(i64 %v1, i64 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: usubo.select.i64
|
||||
@ -359,6 +436,17 @@ entry:
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i1 @usubo.not.i64(i64 %v1, i64 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: usubo.not.i64
|
||||
; CHECK: cmp x0, x1
|
||||
; CHECK-NEXT: cset w0, hs
|
||||
%t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
|
||||
%obit = extractvalue {i64, i1} %t, 1
|
||||
%ret = xor i1 %obit, true
|
||||
ret i1 %ret
|
||||
}
|
||||
|
||||
define i32 @smulo.select.i32(i32 %v1, i32 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: smulo.select.i32
|
||||
@ -372,6 +460,19 @@ entry:
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i1 @smulo.not.i32(i32 %v1, i32 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: smulo.not.i32
|
||||
; CHECK: smull x[[MREG:[0-9]+]], w0, w1
|
||||
; CHECK-NEXT: lsr x[[SREG:[0-9]+]], x[[MREG]], #32
|
||||
; CHECK-NEXT: cmp w[[SREG]], w[[MREG]], asr #31
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
%t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
|
||||
%obit = extractvalue {i32, i1} %t, 1
|
||||
%ret = xor i1 %obit, true
|
||||
ret i1 %ret
|
||||
}
|
||||
|
||||
define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: smulo.select.i64
|
||||
@ -385,6 +486,19 @@ entry:
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i1 @smulo.not.i64(i64 %v1, i64 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: smulo.not.i64
|
||||
; CHECK: mul [[MREG:x[0-9]+]], x0, x1
|
||||
; CHECK-NEXT: smulh [[HREG:x[0-9]+]], x0, x1
|
||||
; CHECK-NEXT: cmp [[HREG]], [[MREG]], asr #63
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
%t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
|
||||
%obit = extractvalue {i64, i1} %t, 1
|
||||
%ret = xor i1 %obit, true
|
||||
ret i1 %ret
|
||||
}
|
||||
|
||||
define i32 @umulo.select.i32(i32 %v1, i32 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: umulo.select.i32
|
||||
@ -397,6 +511,18 @@ entry:
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i1 @umulo.not.i32(i32 %v1, i32 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: umulo.not.i32
|
||||
; CHECK: umull [[MREG:x[0-9]+]], w0, w1
|
||||
; CHECK-NEXT: cmp xzr, [[MREG]], lsr #32
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
%t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
|
||||
%obit = extractvalue {i32, i1} %t, 1
|
||||
%ret = xor i1 %obit, true
|
||||
ret i1 %ret
|
||||
}
|
||||
|
||||
define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: umulo.select.i64
|
||||
@ -409,6 +535,18 @@ entry:
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
|
||||
entry:
|
||||
; CHECK-LABEL: umulo.not.i64
|
||||
; CHECK: umulh [[MREG:x[0-9]+]], x0, x1
|
||||
; CHECK-NEXT: cmp xzr, [[MREG]]
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
%t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
|
||||
%obit = extractvalue {i64, i1} %t, 1
|
||||
%ret = xor i1 %obit, true
|
||||
ret i1 %ret
|
||||
}
|
||||
|
||||
|
||||
;
|
||||
; Check the use of the overflow bit in combination with a branch instruction.
|
||||
|
Loading…
x
Reference in New Issue
Block a user