mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-16 08:08:01 +00:00
Add WIN_FTOL_* psudo-instructions to model the unique calling convention
used by the Win32 _ftol2 runtime function. Patch by Joe Groff! llvm-svn: 151382
This commit is contained in:
parent
c077e0f945
commit
d2f0ce2674
@ -1644,6 +1644,32 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
|
||||
return;
|
||||
}
|
||||
|
||||
case X86::WIN_FTOL_32:
|
||||
case X86::WIN_FTOL_64: {
|
||||
MachineBasicBlock::iterator InsertPt = MI;
|
||||
|
||||
// Push the operand into ST0.
|
||||
MachineOperand &Op = MI->getOperand(0);
|
||||
assert(Op.isUse() && Op.isReg() &&
|
||||
Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6);
|
||||
unsigned FPReg = getFPReg(Op);
|
||||
if (Op.isKill())
|
||||
moveToTop(FPReg, I);
|
||||
else
|
||||
duplicateToTop(FPReg, FPReg, I);
|
||||
|
||||
// Emit the call. This will pop the operand.
|
||||
BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
|
||||
.addExternalSymbol("_ftol2")
|
||||
.addReg(X86::ST0, RegState::ImplicitKill)
|
||||
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
|
||||
.addReg(X86::EDX, RegState::Define | RegState::Implicit)
|
||||
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
|
||||
--StackTop;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case X86::RET:
|
||||
case X86::RETI:
|
||||
// If RET has an FP register use operand, pass the first one in ST(0) and
|
||||
|
@ -187,15 +187,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setLibcallName(RTLIB::SREM_I64, "_allrem");
|
||||
setLibcallName(RTLIB::UREM_I64, "_aullrem");
|
||||
setLibcallName(RTLIB::MUL_I64, "_allmul");
|
||||
setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
|
||||
setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
|
||||
setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
|
||||
setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
|
||||
setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
|
||||
setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
|
||||
setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
|
||||
setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
|
||||
setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
|
||||
|
||||
// The _ftol2 runtime function has an unusual calling conv, which
|
||||
// is modeled by a special pseudo-instruction.
|
||||
setLibcallName(RTLIB::FPTOUINT_F64_I64, 0);
|
||||
setLibcallName(RTLIB::FPTOUINT_F32_I64, 0);
|
||||
setLibcallName(RTLIB::FPTOUINT_F64_I32, 0);
|
||||
setLibcallName(RTLIB::FPTOUINT_F32_I32, 0);
|
||||
}
|
||||
|
||||
if (Subtarget->isTargetDarwin()) {
|
||||
@ -315,6 +318,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
|
||||
}
|
||||
|
||||
if (isTargetFTOL()) {
|
||||
// Use the _ftol2 runtime function, which has a pseudo-instruction
|
||||
// to handle its weird calling convention.
|
||||
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
|
||||
}
|
||||
|
||||
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
|
||||
if (!X86ScalarSSEf64) {
|
||||
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
|
||||
@ -7708,14 +7717,14 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
|
||||
|
||||
EVT DstTy = Op.getValueType();
|
||||
|
||||
if (!IsSigned) {
|
||||
if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {
|
||||
assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
|
||||
DstTy = MVT::i64;
|
||||
}
|
||||
|
||||
assert(DstTy.getSimpleVT() <= MVT::i64 &&
|
||||
DstTy.getSimpleVT() >= MVT::i16 &&
|
||||
"Unknown FP_TO_SINT to lower!");
|
||||
"Unknown FP_TO_INT to lower!");
|
||||
|
||||
// These are really Legal.
|
||||
if (DstTy == MVT::i32 &&
|
||||
@ -7726,26 +7735,29 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
|
||||
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
|
||||
return std::make_pair(SDValue(), SDValue());
|
||||
|
||||
// We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
|
||||
// stack slot.
|
||||
// We lower FP->int64 either into FISTP64 followed by a load from a temporary
|
||||
// stack slot, or into the FTOL runtime function.
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
unsigned MemSize = DstTy.getSizeInBits()/8;
|
||||
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
|
||||
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
|
||||
|
||||
|
||||
|
||||
unsigned Opc;
|
||||
switch (DstTy.getSimpleVT().SimpleTy) {
|
||||
default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
|
||||
case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
|
||||
case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
|
||||
case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
|
||||
}
|
||||
if (!IsSigned && isIntegerTypeFTOL(DstTy))
|
||||
Opc = X86ISD::WIN_FTOL;
|
||||
else
|
||||
switch (DstTy.getSimpleVT().SimpleTy) {
|
||||
default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
|
||||
case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
|
||||
case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
|
||||
case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
|
||||
}
|
||||
|
||||
SDValue Chain = DAG.getEntryNode();
|
||||
SDValue Value = Op.getOperand(0);
|
||||
EVT TheVT = Op.getOperand(0).getValueType();
|
||||
// FIXME This causes a redundant load/store if the SSE-class value is already
|
||||
// in memory, such as if it is on the callstack.
|
||||
if (isScalarFPTypeInSSEReg(TheVT)) {
|
||||
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
|
||||
Chain = DAG.getStore(Chain, DL, Value, StackSlot,
|
||||
@ -7770,12 +7782,23 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
|
||||
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
|
||||
MachineMemOperand::MOStore, MemSize, MemSize);
|
||||
|
||||
// Build the FP_TO_INT*_IN_MEM
|
||||
SDValue Ops[] = { Chain, Value, StackSlot };
|
||||
SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
|
||||
Ops, 3, DstTy, MMO);
|
||||
|
||||
return std::make_pair(FIST, StackSlot);
|
||||
if (Opc != X86ISD::WIN_FTOL) {
|
||||
// Build the FP_TO_INT*_IN_MEM
|
||||
SDValue Ops[] = { Chain, Value, StackSlot };
|
||||
SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
|
||||
Ops, 3, DstTy, MMO);
|
||||
return std::make_pair(FIST, StackSlot);
|
||||
} else {
|
||||
SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
|
||||
DAG.getVTList(MVT::Other, MVT::Glue),
|
||||
Chain, Value);
|
||||
SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX,
|
||||
MVT::i32, ftol.getValue(1));
|
||||
SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX,
|
||||
MVT::i32, eax.getValue(2));
|
||||
SDValue pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, eax, edx);
|
||||
return std::make_pair(pair, SDValue());
|
||||
}
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
|
||||
@ -7788,10 +7811,14 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
|
||||
// If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
|
||||
if (FIST.getNode() == 0) return Op;
|
||||
|
||||
// Load the result.
|
||||
return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
|
||||
FIST, StackSlot, MachinePointerInfo(),
|
||||
false, false, false, 0);
|
||||
if (StackSlot.getNode())
|
||||
// Load the result.
|
||||
return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
|
||||
FIST, StackSlot, MachinePointerInfo(),
|
||||
false, false, false, 0);
|
||||
else
|
||||
// The node is the result.
|
||||
return FIST;
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
|
||||
@ -10837,16 +10864,25 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
case ISD::SUBE:
|
||||
// We don't want to expand or promote these.
|
||||
return;
|
||||
case ISD::FP_TO_SINT: {
|
||||
case ISD::FP_TO_SINT:
|
||||
case ISD::FP_TO_UINT: {
|
||||
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
|
||||
|
||||
if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType()))
|
||||
return;
|
||||
|
||||
std::pair<SDValue,SDValue> Vals =
|
||||
FP_TO_INTHelper(SDValue(N, 0), DAG, true);
|
||||
FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned);
|
||||
SDValue FIST = Vals.first, StackSlot = Vals.second;
|
||||
if (FIST.getNode() != 0) {
|
||||
EVT VT = N->getValueType(0);
|
||||
// Return a load from the stack slot.
|
||||
Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
|
||||
MachinePointerInfo(),
|
||||
false, false, false, 0));
|
||||
if (StackSlot.getNode() != 0)
|
||||
Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
|
||||
MachinePointerInfo(),
|
||||
false, false, false, 0));
|
||||
else
|
||||
Results.push_back(FIST);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -11060,6 +11096,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
|
||||
case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER";
|
||||
case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
|
||||
case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -299,6 +299,9 @@ namespace llvm {
|
||||
// falls back to heap allocation if not.
|
||||
SEG_ALLOCA,
|
||||
|
||||
// WIN_FTOL - Windows's _ftol2 runtime routine to do fptoui.
|
||||
WIN_FTOL,
|
||||
|
||||
// Memory barrier
|
||||
MEMBARRIER,
|
||||
MFENCE,
|
||||
@ -611,6 +614,18 @@ namespace llvm {
|
||||
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
|
||||
}
|
||||
|
||||
/// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine
|
||||
/// for fptoui.
|
||||
bool isTargetFTOL() const {
|
||||
return Subtarget->isTargetWindows() && !Subtarget->is64Bit();
|
||||
}
|
||||
|
||||
/// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be
|
||||
/// used for fptoui to the given type.
|
||||
bool isIntegerTypeFTOL(EVT VT) const {
|
||||
return isTargetFTOL() && VT == MVT::i64;
|
||||
}
|
||||
|
||||
/// createFastISel - This method returns a target specific FastISel object,
|
||||
/// or null if the target does not support "fast" ISel.
|
||||
virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
|
||||
|
@ -125,10 +125,26 @@ def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
|
||||
[(set GR64:$dst,
|
||||
(X86SegAlloca GR64:$size))]>,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
}
|
||||
|
||||
// The MSVC runtime contains an _ftol2 routine for converting floating-point
|
||||
// to integer values. It has a strange calling convention: the input is
|
||||
// popped from the x87 stack, and the return value is given in EDX:EAX. No
|
||||
// other registers (aside from flags) are touched.
|
||||
// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80
|
||||
// variant is unnecessary.
|
||||
|
||||
let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in {
|
||||
def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src),
|
||||
"# win32 fptoui",
|
||||
[(X86WinFTOL RFP32:$src)]>,
|
||||
Requires<[In32BitMode]>;
|
||||
|
||||
def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src),
|
||||
"# win32 fptoui",
|
||||
[(X86WinFTOL RFP64:$src)]>,
|
||||
Requires<[In32BitMode]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// EH Pseudo Instructions
|
||||
|
@ -99,6 +99,8 @@ def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
|
||||
|
||||
def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
|
||||
|
||||
def SDT_X86WIN_FTOL : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
|
||||
|
||||
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
|
||||
|
||||
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
|
||||
@ -238,6 +240,9 @@ def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
|
||||
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
||||
|
||||
def X86WinFTOL : SDNode<"X86ISD::WIN_FTOL", SDT_X86WIN_FTOL,
|
||||
[SDNPHasChain, SDNPOutGlue]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Operand Definitions.
|
||||
//
|
||||
|
130
test/CodeGen/X86/win_ftol2.ll
Normal file
130
test/CodeGen/X86/win_ftol2.ll
Normal file
@ -0,0 +1,130 @@
|
||||
; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL
|
||||
; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
|
||||
; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=COMPILERRT
|
||||
; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL_2
|
||||
|
||||
; Win32 targets use the MSVCRT _ftol2 runtime function for fptoui to i64. This
|
||||
; function has a nonstandard calling convention: the input value is expected on
|
||||
; the x87 stack instead of the callstack. The input value is popped by the
|
||||
; callee. Mingw32 uses normal cdecl compiler-rt functions.
|
||||
|
||||
define i64 @double_ui64(double %x) nounwind {
|
||||
entry:
|
||||
; COMPILERRT: @double_ui64
|
||||
; COMPILERRT-NOT: calll __ftol2
|
||||
; FTOL: @double_ui64
|
||||
; FTOL: fldl
|
||||
; FTOL: calll __ftol2
|
||||
; FTOL-NOT: fstp
|
||||
%0 = fptoui double %x to i64
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @float_ui64(float %x) nounwind {
|
||||
entry:
|
||||
; COMPILERRT: @float_ui64
|
||||
; COMPILERRT-NOT: calll __ftol2
|
||||
; FTOL: @float_ui64
|
||||
; FTOL: flds
|
||||
; FTOL: calll __ftol2
|
||||
; FTOL-NOT: fstp
|
||||
%0 = fptoui float %x to i64
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @double_ui64_2(double %x, double %y, double %z) nounwind {
|
||||
; COMPILERRT: @double_ui64_2
|
||||
; FTOL: @double_ui64_2
|
||||
; FTOL_2: @double_ui64_2
|
||||
;; stack is empty
|
||||
; FTOL_2: fldl
|
||||
;; stack is %z
|
||||
; FTOL_2: fldl
|
||||
;; stack is %y %z
|
||||
; FTOL_2: fldl
|
||||
;; stack is %x %y %z
|
||||
; FTOL_2: fdiv %st(0), %st(1)
|
||||
;; stack is %x %1 %z
|
||||
; FTOL_2: fsubp %st(2)
|
||||
;; stack is %1 %2
|
||||
; FTOL_2: fxch
|
||||
; FTOL_2-NOT: fld
|
||||
; FTOL_2-NOT: fst
|
||||
;; stack is %2 %1
|
||||
; FTOL_2: calll __ftol2
|
||||
; FTOL_2-NOT: fxch
|
||||
; FTOL_2-NOT: fld
|
||||
; FTOL_2-NOT: fst
|
||||
; FTOL_2: calll __ftol2
|
||||
;; stack is empty
|
||||
|
||||
%1 = fdiv double %x, %y
|
||||
%2 = fsub double %x, %z
|
||||
%3 = fptoui double %1 to i64
|
||||
%4 = fptoui double %2 to i64
|
||||
%5 = sub i64 %3, %4
|
||||
ret i64 %5
|
||||
}
|
||||
|
||||
define i64 @double_ui64_3(double %x, double %y, double %z) nounwind {
|
||||
; COMPILERRT: @double_ui64_3
|
||||
; FTOL: @double_ui64_3
|
||||
; FTOL_2: @double_ui64_3
|
||||
;; stack is empty
|
||||
; FTOL_2: fldl
|
||||
;; stack is %z
|
||||
; FTOL_2: fldl
|
||||
;; stack is %y %z
|
||||
; FTOL_2: fldl
|
||||
;; stack is %x %y %z
|
||||
; FTOL_2: fdiv %st(0), %st(1)
|
||||
;; stack is %x %1 %z
|
||||
; FTOL_2: fsubp %st(2)
|
||||
;; stack is %1 %2
|
||||
; FTOL_2-NOT: fxch
|
||||
; FTOL_2-NOT: fld
|
||||
; FTOL_2-NOT: fst
|
||||
;; stack is %1 %2 (still)
|
||||
; FTOL_2: calll __ftol2
|
||||
; FTOL_2-NOT: fxch
|
||||
; FTOL_2-NOT: fld
|
||||
; FTOL_2-NOT: fst
|
||||
; FTOL_2: calll __ftol2
|
||||
;; stack is empty
|
||||
|
||||
%1 = fdiv double %x, %y
|
||||
%2 = fsub double %x, %z
|
||||
%3 = fptoui double %1 to i64
|
||||
%4 = fptoui double %2 to i64
|
||||
%5 = sub i64 %4, %3
|
||||
ret i64 %5
|
||||
}
|
||||
|
||||
define {double, i64} @double_ui64_4(double %x, double %y) nounwind {
|
||||
; COMPILERRT: @double_ui64_4
|
||||
; FTOL: @double_ui64_4
|
||||
; FTOL_2: @double_ui64_4
|
||||
;; stack is empty
|
||||
; FTOL_2: fldl
|
||||
;; stack is %y
|
||||
; FTOL_2: fldl
|
||||
;; stack is %x %y
|
||||
; FTOL_2: fxch
|
||||
;; stack is %y %x
|
||||
; FTOL_2: calll __ftol2
|
||||
;; stack is %x
|
||||
; FTOL_2: fld %st(0)
|
||||
;; stack is %x %x
|
||||
; FTOL_2: calll __ftol2
|
||||
;; stack is %x
|
||||
|
||||
%1 = fptoui double %x to i64
|
||||
%2 = fptoui double %y to i64
|
||||
%3 = sub i64 %1, %2
|
||||
%4 = insertvalue {double, i64} undef, double %x, 0
|
||||
%5 = insertvalue {double, i64} %4, i64 %3, 1
|
||||
ret {double, i64} %5
|
||||
}
|
Loading…
Reference in New Issue
Block a user