[SPARC] Fixes for hardware errata on LEON processor.

Passes to fix three hardware errata that appear on some LEON processor variants. The instructions FSMULD, FMULS and FDIVS do not work as expected on some LEON processors. This change allows those instructions to be substituted for alternatives instruction sequences that are known to work. These passes only run when selected individually, or as part of a processor defintion. They are not included in general SPARC processor compilations for non-LEON processors or for those LEON processors that do not have these hardware errata. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273108 91177308-0d34-0410-b5e6-96231b3b80d8
2025-03-04 00:29:28 +00:00 · 2016-06-19 11:03:28 +00:00 · 2016-06-19 11:03:28 +00:00 · 2ac3a338b2
commit 2ac3a338b2
parent 099f7f2187
12 changed files with 511 additions and 1 deletions
--- a/lib/Target/Sparc/LeonFeatures.td
+++ b/lib/Target/Sparc/LeonFeatures.td
@ -43,3 +43,24 @@ def InsertNOPLoad: SubtargetFeature<
  "true",
  "LEON3 erratum fix: Insert a NOP instruction after every single-cycle load instruction when the next instruction is another load/store instruction" 
 >;
 def FixFSMULD : SubtargetFeature<
  "fixfsmuld",
  "FixFSMULD",
  "true",
  "LEON erratum fix: Do not use FSMULD" 
 >;
 def ReplaceFMULS : SubtargetFeature<
  "replacefmuls",
  "ReplaceFMULS",
  "true",
  "LEON erratum fix: Replace FMULS instruction with FMULD and relevant conversion instructions" 
 >;
 def FixAllFDIVSQRT : SubtargetFeature<
  "fixallfdivsqrt",
  "FixAllFDIVSQRT",
  "true",
  "LEON erratum fix: Fix FDIVS/FDIVD/FSQRTS/FSQRTD instructions with NOPs and floating-point store" 
 >;
--- a/lib/Target/Sparc/LeonPasses.cpp
+++ b/lib/Target/Sparc/LeonPasses.cpp
@ -30,6 +30,51 @@ LEONMachineFunctionPass::LEONMachineFunctionPass(char& ID) :
 {
 }
 int LEONMachineFunctionPass::GetRegIndexForOperand(MachineInstr& MI, int OperandIndex)
 {
  if (MI.getNumOperands() > 0) {
    if (OperandIndex == LAST_OPERAND) {
      OperandIndex = MI.getNumOperands() - 1;
    }
    if (MI.getNumOperands() > (unsigned) OperandIndex
        &&
        MI.getOperand(OperandIndex).isReg()) {
      return (int) MI.getOperand(OperandIndex).getReg();
    }
  }
  static int NotFoundIndex = -10;
  // Return a different number each time to avoid any comparisons between the values returned.
  NotFoundIndex -= 10;
  return NotFoundIndex;
 }
 void LEONMachineFunctionPass::clearUsedRegisterList()
 {
  UsedRegisters.clear();
 }
 void LEONMachineFunctionPass::markRegisterUsed(int registerIndex)
 {
  UsedRegisters.push_back(registerIndex);
 }
 //finds a new free FP register
 //checks also the AllocatedRegisters vector
 int LEONMachineFunctionPass::getUnusedFPRegister(MachineRegisterInfo& MRI)
 {
  for (int RegisterIndex = SP::F0 ; RegisterIndex <= SP::F31 ; ++RegisterIndex) {
    if (!MRI.isPhysRegUsed(RegisterIndex) &&
        !(std::find(UsedRegisters.begin(), UsedRegisters.end(), RegisterIndex) != UsedRegisters.end())) {
      return RegisterIndex;
    }
  }
  return -1;
 }
 //*****************************************************************************
 //**** InsertNOPLoad pass
 //*****************************************************************************
@ -76,3 +121,293 @@ bool InsertNOPLoad::runOnMachineFunction(MachineFunction& MF)
  return Modified;
 }
 //*****************************************************************************
 //**** FixFSMULD pass
 //*****************************************************************************
 //this pass should convert the FSMULD operands to double precision in scratch registers,
 //then calculate the result with the FMULD instruction. Therefore, the pass should replace operations of the form:
 //fsmuld %f20,%f21,%f8
 //with the sequence:
 //fstod %f20,%f0
 //fstod %f21,%f2
 //fmuld %f0,%f2,%f8
 //
 char FixFSMULD::ID = 0;
 FixFSMULD::FixFSMULD(TargetMachine &tm) :
                    LEONMachineFunctionPass(tm, ID)
 {
 }
 bool FixFSMULD::runOnMachineFunction(MachineFunction& MF)
 {
  Subtarget = &MF.getSubtarget<SparcSubtarget>();
  const TargetInstrInfo& TII = *Subtarget->getInstrInfo();
  DebugLoc DL = DebugLoc();
  //errs() << "FixFSMULD on function " << MF.getName() << "\n";
  bool Modified = false;
  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
    MachineBasicBlock &MBB = *MFI;
    for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++ MBBI) {
      MachineInstr &MI = *MBBI;
      unsigned Opcode = MI.getOpcode();
      const int UNASSIGNED_INDEX = -1;
      int Reg1Index = UNASSIGNED_INDEX;
      int Reg2Index = UNASSIGNED_INDEX;
      int Reg3Index = UNASSIGNED_INDEX;
      if (Opcode == SP::FSMULD && MI.getNumOperands() == 3) {
        //errs() << "Detected FSMULD\n";
        //take the registers from fsmuld %f20,%f21,%f8
        Reg1Index = MI.getOperand(0).getReg();
        Reg2Index = MI.getOperand(1).getReg();
        Reg3Index = MI.getOperand(2).getReg();
      }
      else if (MI.isInlineAsm()) {
        std::string AsmString (MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName());
        std::string FMULSOpCoode ("fsmuld");
        std::transform(AsmString.begin(), AsmString.end(), AsmString.begin(), ::tolower);
        if (AsmString.find(FMULSOpCoode) == 0) { // this is an inline FSMULD instruction
          //errs() << "Detected InlineAsm FSMULD\n";
          unsigned StartOp = InlineAsm::MIOp_FirstOperand;
          //extracts the registers from the inline assembly instruction
          for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) {
            const MachineOperand &MO = MI.getOperand(i);
            if (MO.isReg()) {
              if (Reg1Index == UNASSIGNED_INDEX) Reg1Index = MO.getReg();
              else if (Reg2Index == UNASSIGNED_INDEX) Reg2Index = MO.getReg();
              else if (Reg3Index == UNASSIGNED_INDEX) Reg3Index = MO.getReg();
            }
            if (Reg3Index != UNASSIGNED_INDEX)
              break;
          }
        }
      }
      if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX && Reg3Index != UNASSIGNED_INDEX) {
        clearUsedRegisterList();
        MachineBasicBlock::iterator NMBBI = std::next(MBBI);
        //Whatever Reg3Index is hasn't been used yet, so we need to reserve it.
        markRegisterUsed(Reg3Index);
        const int ScratchReg1Index = getUnusedFPRegister(MF.getRegInfo());
        markRegisterUsed(ScratchReg1Index);
        const int ScratchReg2Index = getUnusedFPRegister(MF.getRegInfo());
        markRegisterUsed(ScratchReg2Index);
        if (ScratchReg1Index == UNASSIGNED_INDEX || ScratchReg2Index == UNASSIGNED_INDEX) {
          //errs() << "Cannot allocate free scratch registers for the FixFSMULD pass." << "\n";
        }
        else {
          //create fstod %f20,%f0
          BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
          .addReg(ScratchReg1Index)
          .addReg(Reg1Index);
          //create fstod %f21,%f2
          BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
          .addReg(ScratchReg2Index)
          .addReg(Reg2Index);
          //create fmuld %f0,%f2,%f8
          BuildMI(MBB, MBBI, DL, TII.get(SP::FMULD))
          .addReg(Reg3Index)
          .addReg(ScratchReg1Index)
          .addReg(ScratchReg2Index);
          MI.eraseFromParent();
          MBBI = NMBBI;
          Modified = true;
        }
      }
    }
  }
  return Modified;
 }
 //*****************************************************************************
 //**** ReplaceFMULS pass
 //*****************************************************************************
 //This pass converts the FMULS operands to double precision in scratch registers,
 //then calculates the result with the FMULD instruction.
 //The pass should replace operations of the form:
 //fmuls %f20,%f21,%f8
 //with the sequence:
 //fstod %f20,%f0
 //fstod %f21,%f2
 //fmuld %f0,%f2,%f8
 //
 char ReplaceFMULS::ID = 0;
 ReplaceFMULS::ReplaceFMULS(TargetMachine &tm) :
                    LEONMachineFunctionPass(tm, ID)
 {
 }
 bool ReplaceFMULS::runOnMachineFunction(MachineFunction& MF)
 {
  Subtarget = &MF.getSubtarget<SparcSubtarget>();
  const TargetInstrInfo& TII = *Subtarget->getInstrInfo();
  DebugLoc DL = DebugLoc();
  //errs() << "ReplaceFMULS on function " << MF.getName() << "\n";
  bool Modified = false;
  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
    MachineBasicBlock &MBB = *MFI;
    for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++ MBBI) {
      MachineInstr &MI = *MBBI;
      unsigned Opcode = MI.getOpcode();
      const int UNASSIGNED_INDEX = -1;
      int Reg1Index = UNASSIGNED_INDEX;
      int Reg2Index = UNASSIGNED_INDEX;
      int Reg3Index = UNASSIGNED_INDEX;
      if (Opcode == SP::FMULS && MI.getNumOperands() == 3) {
        //errs() << "Detected FMULS\n";
        //take the registers from fmuls %f20,%f21,%f8
        Reg1Index = MI.getOperand(0).getReg();
        Reg2Index = MI.getOperand(1).getReg();
        Reg3Index = MI.getOperand(2).getReg();
      }
      else if (MI.isInlineAsm()) {
        std::string AsmString (MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName());
        std::string FMULSOpCoode ("fmuls");
        std::transform(AsmString.begin(), AsmString.end(), AsmString.begin(), ::tolower);
        if (AsmString.find(FMULSOpCoode) == 0) { // this is an inline FMULS instruction
          //errs() << "Detected InlineAsm FMULS\n";
          unsigned StartOp = InlineAsm::MIOp_FirstOperand;
          //extracts the registers from the inline assembly instruction
          for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) {
            const MachineOperand &MO = MI.getOperand(i);
            if (MO.isReg()) {
              if (Reg1Index == UNASSIGNED_INDEX) Reg1Index = MO.getReg();
              else if (Reg2Index == UNASSIGNED_INDEX) Reg2Index = MO.getReg();
              else if (Reg3Index == UNASSIGNED_INDEX) Reg3Index = MO.getReg();
            }
            if (Reg3Index != UNASSIGNED_INDEX)
              break;
          }
        }
      }
      if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX && Reg3Index != UNASSIGNED_INDEX) {
        clearUsedRegisterList();
        MachineBasicBlock::iterator NMBBI = std::next(MBBI);
        //Whatever Reg3Index is hasn't been used yet, so we need to reserve it.
        markRegisterUsed(Reg3Index);
        const int ScratchReg1Index = getUnusedFPRegister(MF.getRegInfo());
        markRegisterUsed(ScratchReg1Index);
        const int ScratchReg2Index = getUnusedFPRegister(MF.getRegInfo());
        markRegisterUsed(ScratchReg2Index);
        if (ScratchReg1Index == UNASSIGNED_INDEX || ScratchReg2Index == UNASSIGNED_INDEX) {
          //errs() << "Cannot allocate free scratch registers for the ReplaceFMULS pass." << "\n";
        }
        else {
          //create fstod %f20,%f0
          BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
          .addReg(ScratchReg1Index)
          .addReg(Reg1Index);
          //create fstod %f21,%f2
          BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
          .addReg(ScratchReg2Index)
          .addReg(Reg2Index);
          //create fmuld %f0,%f2,%f8
          BuildMI(MBB, MBBI, DL, TII.get(SP::FMULD))
          .addReg(Reg3Index)
          .addReg(ScratchReg1Index)
          .addReg(ScratchReg2Index);
          MI.eraseFromParent();
          MBBI = NMBBI;
          Modified = true;
        }
      }
    }
  }
  return Modified;
 }
 //*****************************************************************************
 //**** FixAllFDIVSQRT pass
 //*****************************************************************************
 //This pass implements two fixes:
 // 1) fixing the FSQRTS and FSQRTD instructions;
 // 2) fixing the FDIVS and FDIVD instructions.
 //
 char FixAllFDIVSQRT::ID = 0;
 FixAllFDIVSQRT::FixAllFDIVSQRT(TargetMachine &tm) :
                    LEONMachineFunctionPass(tm, ID)
 {
 }
 bool FixAllFDIVSQRT::runOnMachineFunction(MachineFunction& MF)
 {
  Subtarget = &MF.getSubtarget<SparcSubtarget>();
  const TargetInstrInfo& TII = *Subtarget->getInstrInfo();
  DebugLoc DL = DebugLoc();
  //errs() << "FixAllFDIVSQRT on function " << MF.getName() << "\n";
  bool Modified = false;
  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
    MachineBasicBlock &MBB = *MFI;
    //MBB.print(errs());
    for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++ MBBI) {
      MachineInstr &MI = *MBBI;
      //MI.print(errs());
      unsigned Opcode = MI.getOpcode();
      if (MI.isInlineAsm()) {
        std::string AsmString (MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName());
        std::string FSQRTDOpCode ("fsqrtd");
        std::string FDIVDOpCode ("fdivd");
        std::transform(AsmString.begin(), AsmString.end(), AsmString.begin(), ::tolower);
        if (AsmString.find(FSQRTDOpCode) == 0) { // this is an inline fsqrts instruction
          //errs() << "Detected InlineAsm FSQRTD\n";
          Opcode = SP::FSQRTD;
        }
        else if (AsmString.find(FDIVDOpCode) == 0) { // this is an inline fsqrts instruction
          //errs() << "Detected InlineAsm FDIVD\n";
          Opcode = SP::FDIVD;
        }
      }
      // Note: FDIVS and FSQRTS cannot be generated when this erratum fix is switched on
      // so we don't need to check for them here. They will already have been converted
      // to FSQRTD or FDIVD earlier in the pipeline.
      if (Opcode == SP::FSQRTD || Opcode == SP::FDIVD) {
        //errs() << "Inserting 5 NOPs before FSQRTD,FDIVD.\n";
        for (int InsertedCount=0; InsertedCount<5; InsertedCount++)
          BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
        MachineBasicBlock::iterator NMBBI = std::next(MBBI);
        //errs() << "Inserting 28 NOPs after FSQRTD,FDIVD.\n";
        for (int InsertedCount=0; InsertedCount<28; InsertedCount++)
          BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
        Modified = true;
      }
    }
  }
  return Modified;
 }
--- a/lib/Target/Sparc/LeonPasses.h
+++ b/lib/Target/Sparc/LeonPasses.h
@ -25,10 +25,19 @@ class LLVM_LIBRARY_VISIBILITY LEONMachineFunctionPass
    : public MachineFunctionPass {
 protected:
  const SparcSubtarget *Subtarget;
  const int LAST_OPERAND = -1;
  //this vector holds free registers that we allocate in groups for some of the LEON passes
  std::vector <int> UsedRegisters;
 protected:
  LEONMachineFunctionPass(TargetMachine &tm, char& ID);
  LEONMachineFunctionPass(char& ID);
  int GetRegIndexForOperand(MachineInstr& MI, int OperandIndex);
  void clearUsedRegisterList();
  void markRegisterUsed(int registerIndex);
  int getUnusedFPRegister(MachineRegisterInfo& MRI);
 };
 class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass {
@ -42,6 +51,42 @@ public:
    return "InsertNOPLoad: Erratum Fix LBR35: insert a NOP instruction after every single-cycle load instruction when the next instruction is another load/store instruction";
  }
 };
 class LLVM_LIBRARY_VISIBILITY FixFSMULD : public LEONMachineFunctionPass {
 public:
  static char ID;
  FixFSMULD(TargetMachine &tm);
  bool runOnMachineFunction(MachineFunction& MF) override;
  const char *getPassName() const override {
    return "FixFSMULD: Erratum Fix LBR31: do not select FSMULD";
  }
 };
 class LLVM_LIBRARY_VISIBILITY ReplaceFMULS : public LEONMachineFunctionPass {
 public:
  static char ID;
  ReplaceFMULS(TargetMachine &tm);
  bool runOnMachineFunction(MachineFunction& MF) override;
  const char *getPassName() const override {
    return "ReplaceFMULS: Erratum Fix LBR32: replace FMULS instruction with a routine using conversions/double precision operations to replace FMULS";
  }
 };
 class LLVM_LIBRARY_VISIBILITY FixAllFDIVSQRT : public LEONMachineFunctionPass {
 public:
  static char ID;
  FixAllFDIVSQRT(TargetMachine &tm);
  bool runOnMachineFunction(MachineFunction& MF) override;
  const char *getPassName() const override {
    return "FixAllFDIVSQRT: Erratum Fix LBR34: fix FDIVS/FDIVD/FSQRTS/FSQRTD instructions with NOPs and floating-point store";
  }
 };
 } // namespace llvm
 #endif
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@ -122,7 +122,7 @@ def : Processor<"leon3", LEON3Itineraries,
 // LEON 3 FT (UT699). Provides features for the UT699 processor
 // - covers all the erratum fixes for LEON3, but does not support the CASA instruction.
 def : Processor<"ut699", LEON3Itineraries, 
-                [FeatureLeon, InsertNOPLoad]>;
+                [FeatureLeon, InsertNOPLoad, FixFSMULD, ReplaceFMULS, FixAllFDIVSQRT]>;
 // LEON3 FT (GR712RC). Provides features for the GR712RC processor.
 // - covers all the erratum fixed for LEON3 and support for the CASA instruction. 
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@ -1824,6 +1824,19 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
    }
  }
  if (Subtarget->fixAllFDIVSQRT()) {
    // Promote FDIVS and FSQRTS to FDIVD and FSQRTD instructions instead as
    // the former instructions generate errata on LEON processors.
    setOperationAction(ISD::FDIV, MVT::f32, Promote);
    setOperationAction(ISD::FSQRT, MVT::f32, Promote);
  }
  if (Subtarget->replaceFMULS()) {
    // Promote FMULS to FMULD instructions instead as
    // the former instructions generate errata on LEON processors.
    setOperationAction(ISD::FMUL, MVT::f32, Promote);
  }
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
  setMinFunctionAlignment(2);
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@ -57,6 +57,10 @@ def HasLeonCASA : Predicate<"Subtarget->hasLeonCasa()">;
 // UMAC and SMAC instructions
 def HasUMAC_SMAC : Predicate<"Subtarget->hasUmacSmac()">;
 def HasNoFdivSqrtFix : Predicate<"!Subtarget->fixAllFDIVSQRT()">;
 def HasNoFmulsFix : Predicate<"!Subtarget->replaceFMULS()">;
 def HasNoFsmuldFix : Predicate<"!Subtarget->fixFSMULD()">;
 // UseDeprecatedInsts - This predicate is true when the target processor is a
 // V8, or when it is V9 but the V8 deprecated instructions are efficient enough
 // to use when appropriate.  In either of these cases, the instruction selector
@ -1172,6 +1176,9 @@ def FABSS : F3_3u<2, 0b110100, 0b000001001,
 // Floating-point Square Root Instructions, p.145
 // FSQRTS generates an erratum on LEON processors, so by disabling this instruction
 // this will be promoted to use FSQRTD with doubles instead.
 let Predicates = [HasNoFdivSqrtFix] in 
 def FSQRTS : F3_3u<2, 0b110100, 0b000101001,
                  (outs FPRegs:$rd), (ins FPRegs:$rs2),
                  "fsqrts $rs2, $rd",
@ -1225,6 +1232,9 @@ def FSUBQ  : F3_3<2, 0b110100, 0b001000111,
 // Floating-point Multiply and Divide Instructions, p. 147
 // FMULS generates an erratum on LEON processors, so by disabling this instruction
 // this will be promoted to use FMULD with doubles instead.
 let Predicates = [HasNoFmulsFix] in 
 def FMULS  : F3_3<2, 0b110100, 0b001001001,
                  (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
                  "fmuls $rs1, $rs2, $rd",
@ -1241,6 +1251,7 @@ def FMULQ  : F3_3<2, 0b110100, 0b001001011,
                  [(set f128:$rd, (fmul f128:$rs1, f128:$rs2))]>,
                  Requires<[HasHardQuad]>;
 let Predicates = [HasNoFsmuldFix] in
 def FSMULD : F3_3<2, 0b110100, 0b001101001,
                  (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
                  "fsmuld $rs1, $rs2, $rd",
@ -1254,6 +1265,8 @@ def FDMULQ : F3_3<2, 0b110100, 0b001101110,
                                         (fextend f64:$rs2)))]>,
                  Requires<[HasHardQuad]>;
 // FDIVS generates an erratum on LEON processors, so by disabling this instruction
 // this will be promoted to use FDIVD with doubles instead.
 def FDIVS  : F3_3<2, 0b110100, 0b001001101,
                 (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
                 "fdivs $rs1, $rs2, $rd",
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@ -40,6 +40,9 @@ SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
  HasLeonCasa = false;
  HasUmacSmac = false;
  InsertNOPLoad = false;
  FixFSMULD = false;
  ReplaceFMULS = false;
  FixAllFDIVSQRT = false;
  // Determine default and user specified characteristics
  std::string CPUName = CPU;
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@ -45,6 +45,9 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
  bool HasUmacSmac;
  bool HasLeonCasa;
  bool InsertNOPLoad;
  bool FixFSMULD;
  bool ReplaceFMULS;
  bool FixAllFDIVSQRT;
  SparcInstrInfo InstrInfo;
  SparcTargetLowering TLInfo;
@ -85,6 +88,9 @@ public:
  bool hasUmacSmac() const { return HasUmacSmac; }
  bool hasLeonCasa() const { return HasLeonCasa; }
  bool insertNOPLoad() const { return InsertNOPLoad; }
  bool fixFSMULD() const { return FixFSMULD; }
  bool replaceFMULS() const { return ReplaceFMULS; }
  bool fixAllFDIVSQRT() const { return FixAllFDIVSQRT; }
  /// ParseSubtargetFeatures - Parses features string setting specified
  /// subtarget options.  Definition of function is auto generated by tblgen.
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@ -149,6 +149,18 @@ void SparcPassConfig::addPreEmitPass(){
  {
    addPass(new InsertNOPLoad(getSparcTargetMachine()));
  }
  if (this->getSparcTargetMachine().getSubtargetImpl()->fixFSMULD())
  {
    addPass(new FixFSMULD(getSparcTargetMachine()));
  }
  if (this->getSparcTargetMachine().getSubtargetImpl()->replaceFMULS())
  {
    addPass(new ReplaceFMULS(getSparcTargetMachine()));
  }
  if (this->getSparcTargetMachine().getSubtargetImpl()->fixAllFDIVSQRT())
  {
    addPass(new FixAllFDIVSQRT(getSparcTargetMachine()));
  }
 }
 void SparcV8TargetMachine::anchor() { }
--- a/test/CodeGen/SPARC/LeonFixFSMULDPassUT.ll
+++ b/test/CodeGen/SPARC/LeonFixFSMULDPassUT.ll
@ -0,0 +1,26 @@
 ; RUN: llc %s -O0 -march=sparc -mcpu=ut699 -o - | FileCheck %s
 ; CHECK-LABEL: test_fix_fsmuld_1
 ; CHECK:       fsmuld %f20, %f21, %f8
 define double @test_fix_fsmuld_1() {
 entry:
  %a = alloca float, align 4
  %b = alloca float, align 4
  store float 0x402ECCCCC0000000, float* %a, align 4
  store float 0x4022333340000000, float* %b, align 4
  %0 = load float, float* %b, align 4
  %1 = load float, float* %a, align 4
  %mul = tail call double asm sideeffect "fsmuld $0, $1, $2", "={f20},{f21},{f8}"(float* %a, float* %b)
  ret double %mul
 }
 ; CHECK-LABEL: test_fix_fsmuld_2
 ; CHECK:       fsmuld %f20, %f21, %f8
 define double @test_fix_fsmuld_2(float* %a, float* %b) {
 entry:
  %mul = tail call double asm sideeffect "fsmuld $0, $1, $2", "={f20},{f21},{f8}"(float* %a, float* %b)
  ret double %mul
 }
--- a/test/CodeGen/SPARC/LeonReplaceFMULSPassUT.ll
+++ b/test/CodeGen/SPARC/LeonReplaceFMULSPassUT.ll
@ -0,0 +1,16 @@
 ; RUN: llc %s -O0 -march=sparc -mcpu=ut699 -o - | FileCheck %s
 ; CHECK-LABEL: fmuls_fix_test
 ; CHECK:       fmuls %f20, %f21, %f8
 define double @fmuls_fix_test() {
 entry:
  %a = alloca float, align 4
  %b = alloca float, align 4
  store float 0x402ECCCCC0000000, float* %a, align 4
  store float 0x4022333340000000, float* %b, align 4
  %0 = load float, float* %b, align 4
  %1 = load float, float* %a, align 4
  %mul = tail call double asm sideeffect "fmuls $0, $1, $2", "={f20},{f21},{f8}"(float* %a, float* %b)
  ret double %mul
 }
--- a/test/CodeGen/SPARC/LeonSMACUMACInstructionUT.ll
+++ b/test/CodeGen/SPARC/LeonSMACUMACInstructionUT.ll
@ -0,0 +1,20 @@
 ; RUN: llc %s -O0 -march=sparc -mcpu=leon2 -o - | FileCheck %s
 ; RUN: llc %s -O0 -march=sparc -mcpu=leon3 -o - | FileCheck %s
 ; RUN: llc %s -O0 -march=sparc -mcpu=leon4 -o - | FileCheck %s
 ; CHECK-LABEL: smac_test:
 ; CHECK:       smac %o1, %o0, %o0
 define i32 @smac_test(i16* %a, i16* %b) {
 entry:
 ;  %0 = tail call i32 asm sideeffect "smac $2, $1, $0", "={r2},{r3},{r4}"(i16* %a, i16* %b)
  %0 = tail call i32 asm sideeffect "smac $2, $1, $0", "=r,rI,r"(i16* %a, i16* %b)
  ret i32 %0
 }
 ; CHECK-LABEL: umac_test:
 ; CHECK:       umac %o1, %o0, %o0
 define i32 @umac_test(i16* %a, i16* %b) {
 entry:
  %0 = tail call i32 asm sideeffect "umac $2, $1, $0", "=r,rI,r"(i16* %a, i16* %b)
  ret i32 %0
 }