mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-01 09:18:30 +00:00
it turns out that when ".with.overflow" intrinsics were added to the X86
backend that they were all implemented except umul. This one fell back to the default implementation that did a hi/lo multiply and compared the top. Fix this to check the overflow flag that the 'mul' instruction sets, so we can avoid an explicit test. Now we compile: void *func(long count) { return new int[count]; } into: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] seto %cl ## encoding: [0x0f,0x90,0xc1] testb %cl, %cl ## encoding: [0x84,0xc9] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL instead of: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] testq %rdx, %rdx ## encoding: [0x48,0x85,0xd2] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL Other than the silly seto+test, this is using the o bit directly, so it's going in the right direction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120935 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
777dd07394
commit
b20e0b1fdd
@ -1600,7 +1600,32 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
||||
return RetVal;
|
||||
break;
|
||||
}
|
||||
|
||||
case X86ISD::UMUL: {
|
||||
SDValue N0 = Node->getOperand(0);
|
||||
SDValue N1 = Node->getOperand(1);
|
||||
|
||||
unsigned LoReg, HiReg;
|
||||
switch (NVT.getSimpleVT().SimpleTy) {
|
||||
default: llvm_unreachable("Unsupported VT!");
|
||||
case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; Opc = X86::MUL8r; break;
|
||||
case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; Opc = X86::MUL16r; break;
|
||||
case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; Opc = X86::MUL32r; break;
|
||||
case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; Opc = X86::MUL64r; break;
|
||||
}
|
||||
|
||||
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
|
||||
N0, SDValue()).getValue(1);
|
||||
|
||||
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
|
||||
SDValue Ops[] = {N1, InFlag};
|
||||
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
|
||||
|
||||
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
|
||||
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
|
||||
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
case ISD::SMUL_LOHI:
|
||||
case ISD::UMUL_LOHI: {
|
||||
SDValue N0 = Node->getOperand(0);
|
||||
@ -1653,11 +1678,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
||||
CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
|
||||
array_lengthof(Ops));
|
||||
InFlag = SDValue(CNode, 1);
|
||||
|
||||
// Update the chain.
|
||||
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
|
||||
} else {
|
||||
InFlag =
|
||||
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
|
||||
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag);
|
||||
InFlag = SDValue(CNode, 0);
|
||||
}
|
||||
|
||||
// Prevent use of AH in a REX instruction by referencing AX instead.
|
||||
@ -1696,7 +1722,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
||||
ReplaceUses(SDValue(Node, 1), Result);
|
||||
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
|
||||
}
|
||||
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -948,6 +948,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
|
||||
setOperationAction(ISD::USUBO, MVT::i32, Custom);
|
||||
setOperationAction(ISD::SMULO, MVT::i32, Custom);
|
||||
setOperationAction(ISD::UMULO, MVT::i32, Custom);
|
||||
|
||||
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
|
||||
// handle type legalization for these operations here.
|
||||
@ -961,6 +962,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::SSUBO, MVT::i64, Custom);
|
||||
setOperationAction(ISD::USUBO, MVT::i64, Custom);
|
||||
setOperationAction(ISD::SMULO, MVT::i64, Custom);
|
||||
setOperationAction(ISD::UMULO, MVT::i64, Custom);
|
||||
}
|
||||
|
||||
if (!Subtarget->is64Bit()) {
|
||||
@ -7042,7 +7044,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
return NewSetCC;
|
||||
}
|
||||
|
||||
// Look for "(setcc) == / != 1" to avoid unncessary setcc.
|
||||
// Look for "(setcc) == / != 1" to avoid unnecessary setcc.
|
||||
if (Op0.getOpcode() == X86ISD::SETCC &&
|
||||
Op1.getOpcode() == ISD::Constant &&
|
||||
(cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
|
||||
@ -8446,8 +8448,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue RHS = N->getOperand(1);
|
||||
unsigned BaseOp = 0;
|
||||
unsigned Cond = 0;
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
switch (Op.getOpcode()) {
|
||||
default: llvm_unreachable("Unknown ovf instruction!");
|
||||
case ISD::SADDO:
|
||||
@ -8486,19 +8487,29 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
|
||||
BaseOp = X86ISD::SMUL;
|
||||
Cond = X86::COND_O;
|
||||
break;
|
||||
case ISD::UMULO:
|
||||
BaseOp = X86ISD::UMUL;
|
||||
Cond = X86::COND_B;
|
||||
break;
|
||||
case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
|
||||
SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
|
||||
MVT::i32);
|
||||
SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
|
||||
|
||||
SDValue SetCC =
|
||||
DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
|
||||
DAG.getConstant(X86::COND_O, MVT::i32),
|
||||
SDValue(Sum.getNode(), 2));
|
||||
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
|
||||
return Sum;
|
||||
}
|
||||
}
|
||||
|
||||
// Also sets EFLAGS.
|
||||
SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
|
||||
SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS);
|
||||
SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
|
||||
|
||||
SDValue SetCC =
|
||||
DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1),
|
||||
DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1));
|
||||
DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
|
||||
DAG.getConstant(Cond, MVT::i32),
|
||||
SDValue(Sum.getNode(), 1));
|
||||
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
|
||||
return Sum;
|
||||
|
@ -200,9 +200,11 @@ namespace llvm {
|
||||
PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
|
||||
PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
|
||||
|
||||
// ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results.
|
||||
ADD, SUB, SMUL, UMUL,
|
||||
// ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
|
||||
ADD, SUB, SMUL,
|
||||
INC, DEC, OR, XOR, AND,
|
||||
|
||||
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
|
||||
|
||||
// MUL_IMM - X86 specific multiply by immediate.
|
||||
MUL_IMM,
|
||||
|
@ -60,11 +60,12 @@ def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
|
||||
|
||||
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
|
||||
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
|
||||
"mul{l}\t$src",
|
||||
[]>; // EAX,EDX = EAX*GR32
|
||||
"mul{l}\t$src", // EAX,EDX = EAX*GR32
|
||||
[/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
|
||||
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
|
||||
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
|
||||
"mul{q}\t$src", []>; // RAX,RDX = RAX*GR64
|
||||
"mul{q}\t$src", // RAX,RDX = RAX*GR64
|
||||
[/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
|
||||
|
||||
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
|
||||
def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
|
||||
|
@ -35,6 +35,12 @@ def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
|
||||
[SDTCisSameAs<0, 2>,
|
||||
SDTCisSameAs<0, 3>,
|
||||
SDTCisInt<0>, SDTCisVT<1, i32>]>;
|
||||
// RES1, RES2, FLAGS = op LHS, RHS
|
||||
def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2,
|
||||
[SDTCisSameAs<0, 1>,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCisSameAs<0, 3>,
|
||||
SDTCisInt<0>, SDTCisVT<1, i32>]>;
|
||||
def SDTX86BrCond : SDTypeProfile<0, 3,
|
||||
[SDTCisVT<0, OtherVT>,
|
||||
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
|
||||
@ -188,7 +194,7 @@ def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
|
||||
def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
|
||||
def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
|
||||
[SDNPCommutative]>;
|
||||
def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,
|
||||
def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
|
||||
[SDNPCommutative]>;
|
||||
|
||||
def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
|
||||
|
@ -1,8 +1,14 @@
|
||||
; RUN: llc < %s -march=x86 | grep "\\\\\\\<mul"
|
||||
; RUN: llc < %s -march=x86 | FileCheck %s
|
||||
|
||||
declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
|
||||
define i1 @a(i32 %x) zeroext nounwind {
|
||||
%res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
|
||||
%obil = extractvalue {i32, i1} %res, 1
|
||||
ret i1 %obil
|
||||
|
||||
; CHECK: a:
|
||||
; CHECK: mull
|
||||
; CHECK: seto %al
|
||||
; CHECK: movzbl %al, %eax
|
||||
; CHECK: ret
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user