rename FpGETRESULT32 -> FpGET_ST0_32 etc. Add support for

isel'ing value preserving FP roundings from one fp stack reg to another into a noop, instead of stack traffic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48093 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-02 08:46:23 +00:00 · 2008-03-09 07:05:32 +00:00 · 2008-03-09 07:05:32 +00:00 · 6fa2f9c636
commit 6fa2f9c636
parent ce6e492997
5 changed files with 62 additions and 46 deletions
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@ -214,7 +214,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {

    MachineInstr *PrevMI = 0;
    if (I != BB.begin())
-        PrevMI = prior(I);
+      PrevMI = prior(I);

    ++NumFP;  // Keep track of # of pseudo instrs
    DOUT << "\nFPInst:\t" << *MI;
@ -917,13 +917,13 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
  MachineInstr *MI = I;
  switch (MI->getOpcode()) {
  default: assert(0 && "Unknown SpecialFP instruction!");
-  case X86::FpGETRESULT32:  // Appears immediately after a call returning FP type!
-  case X86::FpGETRESULT64:  // Appears immediately after a call returning FP type!
-  case X86::FpGETRESULT80:
+  case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
+  case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type!
+  case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type!
    assert(StackTop == 0 && "Stack should be empty after a call!");
    pushReg(getFPReg(MI->getOperand(0)));
    break;
-  case X86::FpGETRESULT80x2:
+  case X86::FpGET_ST0_ST1:
    assert(StackTop == 0 && "Stack should be empty after a call!");
    pushReg(getFPReg(MI->getOperand(0)));
    pushReg(getFPReg(MI->getOperand(1)));
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@ -486,10 +486,15 @@ void X86DAGToDAGISel::PreprocessForFPConvert(SelectionDAG &DAG) {
    if (SrcIsSSE && DstIsSSE)
      continue;

-    // If this is an FPStack extension (but not a truncation), it is a noop.
-    if (!SrcIsSSE && !DstIsSSE && N->getOpcode() == ISD::FP_EXTEND)
-      continue;
-    
+    if (!SrcIsSSE && !DstIsSSE) {
+      // If this is an FPStack extension, it is a noop.
+      if (N->getOpcode() == ISD::FP_EXTEND)
+        continue;
+      // If this is a value-preserving FPStack truncation, it is a noop.
+      if (N->getConstantOperandVal(1))
+        continue;
+    }
+   
    // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
    // FPStack has extload and truncstore.  SSE can fold direct loads into other
    // operations.  Based on this, decide what we want to do.
@ -1150,7 +1155,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
    case X86ISD::GlobalBaseReg: 
      return getGlobalBaseReg();

-    case X86ISD::FP_GET_RESULT2: {
+    case X86ISD::FP_GET_ST0_ST1: {
      SDOperand Chain = N.getOperand(0);
      SDOperand InFlag = N.getOperand(1);
      AddToISelQueue(Chain);
@ -1161,7 +1166,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
      Tys.push_back(MVT::Other);
      Tys.push_back(MVT::Flag);
      SDOperand Ops[] = { Chain, InFlag };
-      SDNode *ResNode = CurDAG->getTargetNode(X86::FpGETRESULT80x2, Tys,
+      SDNode *ResNode = CurDAG->getTargetNode(X86::FpGET_ST0_ST1, Tys,
                                              Ops, 2);
      Chain = SDOperand(ResNode, 2);
      InFlag = SDOperand(ResNode, 3);
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -917,9 +917,8 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
    if (isScalarFPTypeInSSEReg(GetResultTy))
      GetResultTy = MVT::f80;
    SDVTList Tys = DAG.getVTList(GetResultTy, MVT::Other, MVT::Flag);
-    
    SDOperand GROps[] = { Chain, InFlag };
-    SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
+    SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_ST0, Tys, GROps, 2);
    Chain  = RetVal.getValue(1);
    InFlag = RetVal.getValue(2);

@ -969,7 +968,7 @@ LowerCallResultToTwoX87Regs(SDOperand Chain, SDOperand InFlag,
  const MVT::ValueType VTs[] = { MVT::f80, MVT::f80, MVT::Other, MVT::Flag };
  SDVTList Tys = DAG.getVTList(VTs, 4);
  SDOperand Ops[] = { Chain, InFlag };
-  SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT2, Tys, Ops, 2);
+  SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_ST0_ST1, Tys, Ops, 2);
  Chain = RetVal.getValue(2);
  SDOperand FIN = TheCall->getOperand(5);
  Chain = DAG.getStore(Chain, RetVal.getValue(1), FIN, NULL, 0);
@ -5564,8 +5563,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
  case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
  case X86ISD::FLD:                return "X86ISD::FLD";
  case X86ISD::FST:                return "X86ISD::FST";
-  case X86ISD::FP_GET_RESULT:      return "X86ISD::FP_GET_RESULT";
-  case X86ISD::FP_GET_RESULT2:     return "X86ISD::FP_GET_RESULT2";
+  case X86ISD::FP_GET_ST0:         return "X86ISD::FP_GET_ST0";
+  case X86ISD::FP_GET_ST0_ST1:     return "X86ISD::FP_GET_ST0_ST1";
  case X86ISD::FP_SET_RESULT:      return "X86ISD::FP_SET_RESULT";
  case X86ISD::CALL:               return "X86ISD::CALL";
  case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@ -84,14 +84,14 @@ namespace llvm {
      /// as.
      FST,

-      /// FP_GET_RESULT - This corresponds to FpGETRESULT pseudo instruction
+      /// FP_GET_ST0 - This corresponds to FpGET_ST0 pseudo instruction
      /// which copies from ST(0) to the destination. It takes a chain and
      /// writes a RFP result and a chain.
-      FP_GET_RESULT,
+      FP_GET_ST0,

-      /// FP_GET_RESULT2 - Same as FP_GET_RESULT except it copies two values
+      /// FP_GET_ST0_ST1 - Same as FP_GET_RESULT except it copies two values
      /// ST(0) and ST(1).
-      FP_GET_RESULT2,
+      FP_GET_ST0_ST1,

      /// FP_SET_RESULT - This corresponds to FpSETRESULT pseudo instruction
      /// which copies the source operand to ST(0). It takes a chain+value and
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@ -18,7 +18,8 @@
 //===----------------------------------------------------------------------===//

 def SDTX86FpGet     : SDTypeProfile<1, 0, [SDTCisFP<0>]>;
-def SDTX86FpGet2    : SDTypeProfile<2, 0, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
+def SDTX86FpGet2    : SDTypeProfile<2, 0, [SDTCisVT<0, f80>, 
+                                           SDTCisVT<1, f80>]>;
 def SDTX86FpSet     : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
 def SDTX86Fld       : SDTypeProfile<1, 2, [SDTCisFP<0>,
                                           SDTCisPtrTy<1>, 
@ -32,9 +33,7 @@ def SDTX86FpToIMem  : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;

 def SDTX86CwdStore  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;

-def X86fpget        : SDNode<"X86ISD::FP_GET_RESULT", SDTX86FpGet,
-                             [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
-def X86fpget2       : SDNode<"X86ISD::FP_GET_RESULT2", SDTX86FpGet2,
+def X86fpget_st0    : SDNode<"X86ISD::FP_GET_ST0", SDTX86FpGet,
                             [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
 def X86fpset        : SDNode<"X86ISD::FP_SET_RESULT", SDTX86FpSet,
                             [SDNPHasChain, SDNPOutFlag]>;
@ -139,17 +138,15 @@ let isTerminator = 1 in
 // encoding and asm printing info).

 // Pseudo Instructions for FP stack return values.
-def FpGETRESULT32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP,
-                      [(set RFP32:$dst, X86fpget)]>;           // FPR = ST(0)
+def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP,
+                        [(set RFP32:$dst, X86fpget_st0)]>;       // FPR = ST(0)
+def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP,
+                        [(set RFP64:$dst, X86fpget_st0)]>;       // FPR = ST(0)
+def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP,
+                        [(set RFP80:$dst, X86fpget_st0)]>;       // FPR = ST(0)

-def FpGETRESULT64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP,
-                      [(set RFP64:$dst, X86fpget)]>;           // FPR = ST(0)
-
-def FpGETRESULT80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP,
-                      [(set RFP80:$dst, X86fpget)]>;           // FPR = ST(0)
-
-def FpGETRESULT80x2 : FpI_<(outs RFP80:$dst1, RFP80:$dst2), (ins), SpecialFP,
-                      []>;                        // FPR = ST(0), FPR = ST(1)
+def FpGET_ST0_ST1 : FpI_<(outs RFP80:$dst1, RFP80:$dst2), (ins), SpecialFP,
+                         []>;                        // FPR = ST(0), FPR = ST(1)


 let Defs = [ST0] in {
@ -174,15 +171,15 @@ class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :

 // Register copies.  Just copies, the shortening ones do not truncate.
 let neverHasSideEffects = 1 in {
-def MOV_Fp3232       : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>; 
-def MOV_Fp3264       : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>; 
-def MOV_Fp6432       : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>; 
-def MOV_Fp6464       : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>; 
-def MOV_Fp8032       : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>; 
-def MOV_Fp3280       : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>; 
-def MOV_Fp8064       : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>; 
-def MOV_Fp6480       : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>; 
-def MOV_Fp8080       : FpI_<(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>; 
+  def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>; 
+  def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>; 
+  def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>; 
+  def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>; 
+  def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>; 
+  def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>; 
+  def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>; 
+  def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>; 
+  def MOV_Fp8080 : FpI_  <(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>; 
 }

 // Factoring for arithmetic.
@ -583,6 +580,21 @@ def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
 // Used to conv. i64 to f64 since there isn't a SSE version.
 def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;

-def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>, Requires<[FPStackf32]>;
-def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>, Requires<[FPStackf32]>;
-def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>, Requires<[FPStackf64]>;
+// FP extensions map onto simple pseudo-value conversions if they are to/from
+// the FP stack.
+def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>,
+          Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>,
+           Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>,
+           Requires<[FPStackf64]>;
+
+// FP truncations map onto simple pseudo-value conversions if they are to/from
+// the FP stack.  We have validated that only value-preserving truncations make
+// it through isel.
+def : Pat<(f32 (fround RFP64:$src)), (MOV_Fp6432 RFP64:$src)>,
+          Requires<[FPStackf32]>;
+def : Pat<(f32 (fround RFP80:$src)), (MOV_Fp8032 RFP80:$src)>,
+           Requires<[FPStackf32]>;
+def : Pat<(f64 (fround RFP80:$src)), (MOV_Fp8064 RFP80:$src)>,
+           Requires<[FPStackf64]>;