Make the fast-isel code for literal 0.0 a bit shorter/faster, since 0.0 is common. rdar://problem/9303592 .

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130338 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-07 15:52:21 +00:00 · 2011-04-27 22:41:55 +00:00 · 2011-04-27 22:41:55 +00:00 · 2790ba8e5a
commit 2790ba8e5a
parent 161b887506
4 changed files with 64 additions and 2 deletions
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@ -317,6 +317,10 @@ protected:
    return 0;
  }
  virtual unsigned TargetMaterializeFloatZero(const ConstantFP* CF) {
    return 0;
  }
 private:
  bool SelectBinaryOp(const User *I, unsigned ISDOpcode);
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@ -164,8 +164,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
    Reg =
      getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
  } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
-    // Try to emit the constant directly.
+    if (CF->isZero()) {
-    Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+      Reg = TargetMaterializeFloatZero(CF);
    } else {
      // Try to emit the constant directly.
      Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
    }
    if (!Reg) {
      // Try to emit the constant by using an integer constant with a cast.
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@ -124,6 +124,8 @@ private:
  unsigned TargetMaterializeAlloca(const AllocaInst *C);
  unsigned TargetMaterializeFloatZero(const ConstantFP *CF);
  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
  /// computed in an SSE register, not on the X87 floating point stack.
  bool isScalarFPTypeInSSEReg(EVT VT) const {
@ -2049,6 +2051,45 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
  return ResultReg;
 }
 unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
  MVT VT;
  if (!isTypeLegal(CF->getType(), VT))
    return false;
  // Get opcode and regclass for the given zero.
  unsigned Opc = 0;
  const TargetRegisterClass *RC = NULL;
  switch (VT.SimpleTy) {
    default: return false;
    case MVT::f32:
      if (Subtarget->hasSSE1()) {
        Opc = X86::FsFLD0SS;
        RC  = X86::FR32RegisterClass;
      } else {
        Opc = X86::LD_Fp032;
        RC  = X86::RFP32RegisterClass;
      }
      break;
    case MVT::f64:
      if (Subtarget->hasSSE2()) {
        Opc = X86::FsFLD0SD;
        RC  = X86::FR64RegisterClass;
      } else {
        Opc = X86::LD_Fp064;
        RC  = X86::RFP64RegisterClass;
      }
      break;
    case MVT::f80:
      // No f80 support yet.
      return false;
  }
  unsigned ResultReg = createResultReg(RC);
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
  return ResultReg;
 }
 /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
 /// vreg is being provided by the specified load instruction.  If possible,
 /// try to fold the load as an operand to the instruction, returning true if
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@ -225,3 +225,16 @@ if.else:                                          ; preds = %entry
 ; CHECK-NEXT: je 
 }
 ; Check that 0.0 is materialized using pxor
 define void @test18(float* %p1) {
  store float 0.0, float* %p1
  ret void
 ; CHECK: test18:
 ; CHECK: pxor
 }
 define void @test19(double* %p1) {
  store double 0.0, double* %p1
  ret void
 ; CHECK: test19:
 ; CHECK: pxor
 }