[SPARC] Fixes for hardware errata on LEON processor.

Passes to fix three hardware errata that appear on some LEON processor variants.

The instructions FSMULD, FMULS and FDIVS do not work as expected on some LEON processors. This change allows those instructions to be substituted for alternatives instruction sequences that are known to work.

These passes only run when selected individually, or as part of a processor defintion. They are not included in general SPARC processor compilations for non-LEON processors or for those LEON processors that do not have these hardware errata.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273108 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Dewhurst 2016-06-19 11:03:28 +00:00
parent 099f7f2187
commit 2ac3a338b2
12 changed files with 511 additions and 1 deletions

View File

@ -43,3 +43,24 @@ def InsertNOPLoad: SubtargetFeature<
"true", "true",
"LEON3 erratum fix: Insert a NOP instruction after every single-cycle load instruction when the next instruction is another load/store instruction" "LEON3 erratum fix: Insert a NOP instruction after every single-cycle load instruction when the next instruction is another load/store instruction"
>; >;
def FixFSMULD : SubtargetFeature<
"fixfsmuld",
"FixFSMULD",
"true",
"LEON erratum fix: Do not use FSMULD"
>;
def ReplaceFMULS : SubtargetFeature<
"replacefmuls",
"ReplaceFMULS",
"true",
"LEON erratum fix: Replace FMULS instruction with FMULD and relevant conversion instructions"
>;
def FixAllFDIVSQRT : SubtargetFeature<
"fixallfdivsqrt",
"FixAllFDIVSQRT",
"true",
"LEON erratum fix: Fix FDIVS/FDIVD/FSQRTS/FSQRTD instructions with NOPs and floating-point store"
>;

View File

@ -30,6 +30,51 @@ LEONMachineFunctionPass::LEONMachineFunctionPass(char& ID) :
{ {
} }
int LEONMachineFunctionPass::GetRegIndexForOperand(MachineInstr& MI, int OperandIndex)
{
if (MI.getNumOperands() > 0) {
if (OperandIndex == LAST_OPERAND) {
OperandIndex = MI.getNumOperands() - 1;
}
if (MI.getNumOperands() > (unsigned) OperandIndex
&&
MI.getOperand(OperandIndex).isReg()) {
return (int) MI.getOperand(OperandIndex).getReg();
}
}
static int NotFoundIndex = -10;
// Return a different number each time to avoid any comparisons between the values returned.
NotFoundIndex -= 10;
return NotFoundIndex;
}
void LEONMachineFunctionPass::clearUsedRegisterList()
{
UsedRegisters.clear();
}
void LEONMachineFunctionPass::markRegisterUsed(int registerIndex)
{
UsedRegisters.push_back(registerIndex);
}
//finds a new free FP register
//checks also the AllocatedRegisters vector
int LEONMachineFunctionPass::getUnusedFPRegister(MachineRegisterInfo& MRI)
{
for (int RegisterIndex = SP::F0 ; RegisterIndex <= SP::F31 ; ++RegisterIndex) {
if (!MRI.isPhysRegUsed(RegisterIndex) &&
!(std::find(UsedRegisters.begin(), UsedRegisters.end(), RegisterIndex) != UsedRegisters.end())) {
return RegisterIndex;
}
}
return -1;
}
//***************************************************************************** //*****************************************************************************
//**** InsertNOPLoad pass //**** InsertNOPLoad pass
//***************************************************************************** //*****************************************************************************
@ -76,3 +121,293 @@ bool InsertNOPLoad::runOnMachineFunction(MachineFunction& MF)
return Modified; return Modified;
} }
//*****************************************************************************
//**** FixFSMULD pass
//*****************************************************************************
//this pass should convert the FSMULD operands to double precision in scratch registers,
//then calculate the result with the FMULD instruction. Therefore, the pass should replace operations of the form:
//fsmuld %f20,%f21,%f8
//with the sequence:
//fstod %f20,%f0
//fstod %f21,%f2
//fmuld %f0,%f2,%f8
//
char FixFSMULD::ID = 0;
FixFSMULD::FixFSMULD(TargetMachine &tm) :
LEONMachineFunctionPass(tm, ID)
{
}
bool FixFSMULD::runOnMachineFunction(MachineFunction& MF)
{
Subtarget = &MF.getSubtarget<SparcSubtarget>();
const TargetInstrInfo& TII = *Subtarget->getInstrInfo();
DebugLoc DL = DebugLoc();
//errs() << "FixFSMULD on function " << MF.getName() << "\n";
bool Modified = false;
for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
MachineBasicBlock &MBB = *MFI;
for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++ MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
const int UNASSIGNED_INDEX = -1;
int Reg1Index = UNASSIGNED_INDEX;
int Reg2Index = UNASSIGNED_INDEX;
int Reg3Index = UNASSIGNED_INDEX;
if (Opcode == SP::FSMULD && MI.getNumOperands() == 3) {
//errs() << "Detected FSMULD\n";
//take the registers from fsmuld %f20,%f21,%f8
Reg1Index = MI.getOperand(0).getReg();
Reg2Index = MI.getOperand(1).getReg();
Reg3Index = MI.getOperand(2).getReg();
}
else if (MI.isInlineAsm()) {
std::string AsmString (MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName());
std::string FMULSOpCoode ("fsmuld");
std::transform(AsmString.begin(), AsmString.end(), AsmString.begin(), ::tolower);
if (AsmString.find(FMULSOpCoode) == 0) { // this is an inline FSMULD instruction
//errs() << "Detected InlineAsm FSMULD\n";
unsigned StartOp = InlineAsm::MIOp_FirstOperand;
//extracts the registers from the inline assembly instruction
for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (MO.isReg()) {
if (Reg1Index == UNASSIGNED_INDEX) Reg1Index = MO.getReg();
else if (Reg2Index == UNASSIGNED_INDEX) Reg2Index = MO.getReg();
else if (Reg3Index == UNASSIGNED_INDEX) Reg3Index = MO.getReg();
}
if (Reg3Index != UNASSIGNED_INDEX)
break;
}
}
}
if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX && Reg3Index != UNASSIGNED_INDEX) {
clearUsedRegisterList();
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
//Whatever Reg3Index is hasn't been used yet, so we need to reserve it.
markRegisterUsed(Reg3Index);
const int ScratchReg1Index = getUnusedFPRegister(MF.getRegInfo());
markRegisterUsed(ScratchReg1Index);
const int ScratchReg2Index = getUnusedFPRegister(MF.getRegInfo());
markRegisterUsed(ScratchReg2Index);
if (ScratchReg1Index == UNASSIGNED_INDEX || ScratchReg2Index == UNASSIGNED_INDEX) {
//errs() << "Cannot allocate free scratch registers for the FixFSMULD pass." << "\n";
}
else {
//create fstod %f20,%f0
BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
.addReg(ScratchReg1Index)
.addReg(Reg1Index);
//create fstod %f21,%f2
BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
.addReg(ScratchReg2Index)
.addReg(Reg2Index);
//create fmuld %f0,%f2,%f8
BuildMI(MBB, MBBI, DL, TII.get(SP::FMULD))
.addReg(Reg3Index)
.addReg(ScratchReg1Index)
.addReg(ScratchReg2Index);
MI.eraseFromParent();
MBBI = NMBBI;
Modified = true;
}
}
}
}
return Modified;
}
//*****************************************************************************
//**** ReplaceFMULS pass
//*****************************************************************************
//This pass converts the FMULS operands to double precision in scratch registers,
//then calculates the result with the FMULD instruction.
//The pass should replace operations of the form:
//fmuls %f20,%f21,%f8
//with the sequence:
//fstod %f20,%f0
//fstod %f21,%f2
//fmuld %f0,%f2,%f8
//
char ReplaceFMULS::ID = 0;
ReplaceFMULS::ReplaceFMULS(TargetMachine &tm) :
LEONMachineFunctionPass(tm, ID)
{
}
bool ReplaceFMULS::runOnMachineFunction(MachineFunction& MF)
{
Subtarget = &MF.getSubtarget<SparcSubtarget>();
const TargetInstrInfo& TII = *Subtarget->getInstrInfo();
DebugLoc DL = DebugLoc();
//errs() << "ReplaceFMULS on function " << MF.getName() << "\n";
bool Modified = false;
for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
MachineBasicBlock &MBB = *MFI;
for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++ MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
const int UNASSIGNED_INDEX = -1;
int Reg1Index = UNASSIGNED_INDEX;
int Reg2Index = UNASSIGNED_INDEX;
int Reg3Index = UNASSIGNED_INDEX;
if (Opcode == SP::FMULS && MI.getNumOperands() == 3) {
//errs() << "Detected FMULS\n";
//take the registers from fmuls %f20,%f21,%f8
Reg1Index = MI.getOperand(0).getReg();
Reg2Index = MI.getOperand(1).getReg();
Reg3Index = MI.getOperand(2).getReg();
}
else if (MI.isInlineAsm()) {
std::string AsmString (MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName());
std::string FMULSOpCoode ("fmuls");
std::transform(AsmString.begin(), AsmString.end(), AsmString.begin(), ::tolower);
if (AsmString.find(FMULSOpCoode) == 0) { // this is an inline FMULS instruction
//errs() << "Detected InlineAsm FMULS\n";
unsigned StartOp = InlineAsm::MIOp_FirstOperand;
//extracts the registers from the inline assembly instruction
for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (MO.isReg()) {
if (Reg1Index == UNASSIGNED_INDEX) Reg1Index = MO.getReg();
else if (Reg2Index == UNASSIGNED_INDEX) Reg2Index = MO.getReg();
else if (Reg3Index == UNASSIGNED_INDEX) Reg3Index = MO.getReg();
}
if (Reg3Index != UNASSIGNED_INDEX)
break;
}
}
}
if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX && Reg3Index != UNASSIGNED_INDEX) {
clearUsedRegisterList();
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
//Whatever Reg3Index is hasn't been used yet, so we need to reserve it.
markRegisterUsed(Reg3Index);
const int ScratchReg1Index = getUnusedFPRegister(MF.getRegInfo());
markRegisterUsed(ScratchReg1Index);
const int ScratchReg2Index = getUnusedFPRegister(MF.getRegInfo());
markRegisterUsed(ScratchReg2Index);
if (ScratchReg1Index == UNASSIGNED_INDEX || ScratchReg2Index == UNASSIGNED_INDEX) {
//errs() << "Cannot allocate free scratch registers for the ReplaceFMULS pass." << "\n";
}
else {
//create fstod %f20,%f0
BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
.addReg(ScratchReg1Index)
.addReg(Reg1Index);
//create fstod %f21,%f2
BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
.addReg(ScratchReg2Index)
.addReg(Reg2Index);
//create fmuld %f0,%f2,%f8
BuildMI(MBB, MBBI, DL, TII.get(SP::FMULD))
.addReg(Reg3Index)
.addReg(ScratchReg1Index)
.addReg(ScratchReg2Index);
MI.eraseFromParent();
MBBI = NMBBI;
Modified = true;
}
}
}
}
return Modified;
}
//*****************************************************************************
//**** FixAllFDIVSQRT pass
//*****************************************************************************
//This pass implements two fixes:
// 1) fixing the FSQRTS and FSQRTD instructions;
// 2) fixing the FDIVS and FDIVD instructions.
//
char FixAllFDIVSQRT::ID = 0;
FixAllFDIVSQRT::FixAllFDIVSQRT(TargetMachine &tm) :
LEONMachineFunctionPass(tm, ID)
{
}
bool FixAllFDIVSQRT::runOnMachineFunction(MachineFunction& MF)
{
Subtarget = &MF.getSubtarget<SparcSubtarget>();
const TargetInstrInfo& TII = *Subtarget->getInstrInfo();
DebugLoc DL = DebugLoc();
//errs() << "FixAllFDIVSQRT on function " << MF.getName() << "\n";
bool Modified = false;
for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
MachineBasicBlock &MBB = *MFI;
//MBB.print(errs());
for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++ MBBI) {
MachineInstr &MI = *MBBI;
//MI.print(errs());
unsigned Opcode = MI.getOpcode();
if (MI.isInlineAsm()) {
std::string AsmString (MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName());
std::string FSQRTDOpCode ("fsqrtd");
std::string FDIVDOpCode ("fdivd");
std::transform(AsmString.begin(), AsmString.end(), AsmString.begin(), ::tolower);
if (AsmString.find(FSQRTDOpCode) == 0) { // this is an inline fsqrts instruction
//errs() << "Detected InlineAsm FSQRTD\n";
Opcode = SP::FSQRTD;
}
else if (AsmString.find(FDIVDOpCode) == 0) { // this is an inline fsqrts instruction
//errs() << "Detected InlineAsm FDIVD\n";
Opcode = SP::FDIVD;
}
}
// Note: FDIVS and FSQRTS cannot be generated when this erratum fix is switched on
// so we don't need to check for them here. They will already have been converted
// to FSQRTD or FDIVD earlier in the pipeline.
if (Opcode == SP::FSQRTD || Opcode == SP::FDIVD) {
//errs() << "Inserting 5 NOPs before FSQRTD,FDIVD.\n";
for (int InsertedCount=0; InsertedCount<5; InsertedCount++)
BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
//errs() << "Inserting 28 NOPs after FSQRTD,FDIVD.\n";
for (int InsertedCount=0; InsertedCount<28; InsertedCount++)
BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
Modified = true;
}
}
}
return Modified;
}

View File

@ -25,10 +25,19 @@ class LLVM_LIBRARY_VISIBILITY LEONMachineFunctionPass
: public MachineFunctionPass { : public MachineFunctionPass {
protected: protected:
const SparcSubtarget *Subtarget; const SparcSubtarget *Subtarget;
const int LAST_OPERAND = -1;
//this vector holds free registers that we allocate in groups for some of the LEON passes
std::vector <int> UsedRegisters;
protected: protected:
LEONMachineFunctionPass(TargetMachine &tm, char& ID); LEONMachineFunctionPass(TargetMachine &tm, char& ID);
LEONMachineFunctionPass(char& ID); LEONMachineFunctionPass(char& ID);
int GetRegIndexForOperand(MachineInstr& MI, int OperandIndex);
void clearUsedRegisterList();
void markRegisterUsed(int registerIndex);
int getUnusedFPRegister(MachineRegisterInfo& MRI);
}; };
class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass { class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass {
@ -42,6 +51,42 @@ public:
return "InsertNOPLoad: Erratum Fix LBR35: insert a NOP instruction after every single-cycle load instruction when the next instruction is another load/store instruction"; return "InsertNOPLoad: Erratum Fix LBR35: insert a NOP instruction after every single-cycle load instruction when the next instruction is another load/store instruction";
} }
}; };
class LLVM_LIBRARY_VISIBILITY FixFSMULD : public LEONMachineFunctionPass {
public:
static char ID;
FixFSMULD(TargetMachine &tm);
bool runOnMachineFunction(MachineFunction& MF) override;
const char *getPassName() const override {
return "FixFSMULD: Erratum Fix LBR31: do not select FSMULD";
}
};
class LLVM_LIBRARY_VISIBILITY ReplaceFMULS : public LEONMachineFunctionPass {
public:
static char ID;
ReplaceFMULS(TargetMachine &tm);
bool runOnMachineFunction(MachineFunction& MF) override;
const char *getPassName() const override {
return "ReplaceFMULS: Erratum Fix LBR32: replace FMULS instruction with a routine using conversions/double precision operations to replace FMULS";
}
};
class LLVM_LIBRARY_VISIBILITY FixAllFDIVSQRT : public LEONMachineFunctionPass {
public:
static char ID;
FixAllFDIVSQRT(TargetMachine &tm);
bool runOnMachineFunction(MachineFunction& MF) override;
const char *getPassName() const override {
return "FixAllFDIVSQRT: Erratum Fix LBR34: fix FDIVS/FDIVD/FSQRTS/FSQRTD instructions with NOPs and floating-point store";
}
};
} // namespace llvm } // namespace llvm
#endif #endif

View File

@ -122,7 +122,7 @@ def : Processor<"leon3", LEON3Itineraries,
// LEON 3 FT (UT699). Provides features for the UT699 processor // LEON 3 FT (UT699). Provides features for the UT699 processor
// - covers all the erratum fixes for LEON3, but does not support the CASA instruction. // - covers all the erratum fixes for LEON3, but does not support the CASA instruction.
def : Processor<"ut699", LEON3Itineraries, def : Processor<"ut699", LEON3Itineraries,
[FeatureLeon, InsertNOPLoad]>; [FeatureLeon, InsertNOPLoad, FixFSMULD, ReplaceFMULS, FixAllFDIVSQRT]>;
// LEON3 FT (GR712RC). Provides features for the GR712RC processor. // LEON3 FT (GR712RC). Provides features for the GR712RC processor.
// - covers all the erratum fixed for LEON3 and support for the CASA instruction. // - covers all the erratum fixed for LEON3 and support for the CASA instruction.

View File

@ -1824,6 +1824,19 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
} }
} }
if (Subtarget->fixAllFDIVSQRT()) {
// Promote FDIVS and FSQRTS to FDIVD and FSQRTD instructions instead as
// the former instructions generate errata on LEON processors.
setOperationAction(ISD::FDIV, MVT::f32, Promote);
setOperationAction(ISD::FSQRT, MVT::f32, Promote);
}
if (Subtarget->replaceFMULS()) {
// Promote FMULS to FMULD instructions instead as
// the former instructions generate errata on LEON processors.
setOperationAction(ISD::FMUL, MVT::f32, Promote);
}
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setMinFunctionAlignment(2); setMinFunctionAlignment(2);

View File

@ -57,6 +57,10 @@ def HasLeonCASA : Predicate<"Subtarget->hasLeonCasa()">;
// UMAC and SMAC instructions // UMAC and SMAC instructions
def HasUMAC_SMAC : Predicate<"Subtarget->hasUmacSmac()">; def HasUMAC_SMAC : Predicate<"Subtarget->hasUmacSmac()">;
def HasNoFdivSqrtFix : Predicate<"!Subtarget->fixAllFDIVSQRT()">;
def HasNoFmulsFix : Predicate<"!Subtarget->replaceFMULS()">;
def HasNoFsmuldFix : Predicate<"!Subtarget->fixFSMULD()">;
// UseDeprecatedInsts - This predicate is true when the target processor is a // UseDeprecatedInsts - This predicate is true when the target processor is a
// V8, or when it is V9 but the V8 deprecated instructions are efficient enough // V8, or when it is V9 but the V8 deprecated instructions are efficient enough
// to use when appropriate. In either of these cases, the instruction selector // to use when appropriate. In either of these cases, the instruction selector
@ -1172,6 +1176,9 @@ def FABSS : F3_3u<2, 0b110100, 0b000001001,
// Floating-point Square Root Instructions, p.145 // Floating-point Square Root Instructions, p.145
// FSQRTS generates an erratum on LEON processors, so by disabling this instruction
// this will be promoted to use FSQRTD with doubles instead.
let Predicates = [HasNoFdivSqrtFix] in
def FSQRTS : F3_3u<2, 0b110100, 0b000101001, def FSQRTS : F3_3u<2, 0b110100, 0b000101001,
(outs FPRegs:$rd), (ins FPRegs:$rs2), (outs FPRegs:$rd), (ins FPRegs:$rs2),
"fsqrts $rs2, $rd", "fsqrts $rs2, $rd",
@ -1225,6 +1232,9 @@ def FSUBQ : F3_3<2, 0b110100, 0b001000111,
// Floating-point Multiply and Divide Instructions, p. 147 // Floating-point Multiply and Divide Instructions, p. 147
// FMULS generates an erratum on LEON processors, so by disabling this instruction
// this will be promoted to use FMULD with doubles instead.
let Predicates = [HasNoFmulsFix] in
def FMULS : F3_3<2, 0b110100, 0b001001001, def FMULS : F3_3<2, 0b110100, 0b001001001,
(outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
"fmuls $rs1, $rs2, $rd", "fmuls $rs1, $rs2, $rd",
@ -1241,6 +1251,7 @@ def FMULQ : F3_3<2, 0b110100, 0b001001011,
[(set f128:$rd, (fmul f128:$rs1, f128:$rs2))]>, [(set f128:$rd, (fmul f128:$rs1, f128:$rs2))]>,
Requires<[HasHardQuad]>; Requires<[HasHardQuad]>;
let Predicates = [HasNoFsmuldFix] in
def FSMULD : F3_3<2, 0b110100, 0b001101001, def FSMULD : F3_3<2, 0b110100, 0b001101001,
(outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
"fsmuld $rs1, $rs2, $rd", "fsmuld $rs1, $rs2, $rd",
@ -1254,6 +1265,8 @@ def FDMULQ : F3_3<2, 0b110100, 0b001101110,
(fextend f64:$rs2)))]>, (fextend f64:$rs2)))]>,
Requires<[HasHardQuad]>; Requires<[HasHardQuad]>;
// FDIVS generates an erratum on LEON processors, so by disabling this instruction
// this will be promoted to use FDIVD with doubles instead.
def FDIVS : F3_3<2, 0b110100, 0b001001101, def FDIVS : F3_3<2, 0b110100, 0b001001101,
(outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2), (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
"fdivs $rs1, $rs2, $rd", "fdivs $rs1, $rs2, $rd",

View File

@ -40,6 +40,9 @@ SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
HasLeonCasa = false; HasLeonCasa = false;
HasUmacSmac = false; HasUmacSmac = false;
InsertNOPLoad = false; InsertNOPLoad = false;
FixFSMULD = false;
ReplaceFMULS = false;
FixAllFDIVSQRT = false;
// Determine default and user specified characteristics // Determine default and user specified characteristics
std::string CPUName = CPU; std::string CPUName = CPU;

View File

@ -45,6 +45,9 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
bool HasUmacSmac; bool HasUmacSmac;
bool HasLeonCasa; bool HasLeonCasa;
bool InsertNOPLoad; bool InsertNOPLoad;
bool FixFSMULD;
bool ReplaceFMULS;
bool FixAllFDIVSQRT;
SparcInstrInfo InstrInfo; SparcInstrInfo InstrInfo;
SparcTargetLowering TLInfo; SparcTargetLowering TLInfo;
@ -85,6 +88,9 @@ public:
bool hasUmacSmac() const { return HasUmacSmac; } bool hasUmacSmac() const { return HasUmacSmac; }
bool hasLeonCasa() const { return HasLeonCasa; } bool hasLeonCasa() const { return HasLeonCasa; }
bool insertNOPLoad() const { return InsertNOPLoad; } bool insertNOPLoad() const { return InsertNOPLoad; }
bool fixFSMULD() const { return FixFSMULD; }
bool replaceFMULS() const { return ReplaceFMULS; }
bool fixAllFDIVSQRT() const { return FixAllFDIVSQRT; }
/// ParseSubtargetFeatures - Parses features string setting specified /// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen. /// subtarget options. Definition of function is auto generated by tblgen.

View File

@ -149,6 +149,18 @@ void SparcPassConfig::addPreEmitPass(){
{ {
addPass(new InsertNOPLoad(getSparcTargetMachine())); addPass(new InsertNOPLoad(getSparcTargetMachine()));
} }
if (this->getSparcTargetMachine().getSubtargetImpl()->fixFSMULD())
{
addPass(new FixFSMULD(getSparcTargetMachine()));
}
if (this->getSparcTargetMachine().getSubtargetImpl()->replaceFMULS())
{
addPass(new ReplaceFMULS(getSparcTargetMachine()));
}
if (this->getSparcTargetMachine().getSubtargetImpl()->fixAllFDIVSQRT())
{
addPass(new FixAllFDIVSQRT(getSparcTargetMachine()));
}
} }
void SparcV8TargetMachine::anchor() { } void SparcV8TargetMachine::anchor() { }

View File

@ -0,0 +1,26 @@
; RUN: llc %s -O0 -march=sparc -mcpu=ut699 -o - | FileCheck %s
; CHECK-LABEL: test_fix_fsmuld_1
; CHECK: fsmuld %f20, %f21, %f8
define double @test_fix_fsmuld_1() {
entry:
%a = alloca float, align 4
%b = alloca float, align 4
store float 0x402ECCCCC0000000, float* %a, align 4
store float 0x4022333340000000, float* %b, align 4
%0 = load float, float* %b, align 4
%1 = load float, float* %a, align 4
%mul = tail call double asm sideeffect "fsmuld $0, $1, $2", "={f20},{f21},{f8}"(float* %a, float* %b)
ret double %mul
}
; CHECK-LABEL: test_fix_fsmuld_2
; CHECK: fsmuld %f20, %f21, %f8
define double @test_fix_fsmuld_2(float* %a, float* %b) {
entry:
%mul = tail call double asm sideeffect "fsmuld $0, $1, $2", "={f20},{f21},{f8}"(float* %a, float* %b)
ret double %mul
}

View File

@ -0,0 +1,16 @@
; RUN: llc %s -O0 -march=sparc -mcpu=ut699 -o - | FileCheck %s
; CHECK-LABEL: fmuls_fix_test
; CHECK: fmuls %f20, %f21, %f8
define double @fmuls_fix_test() {
entry:
%a = alloca float, align 4
%b = alloca float, align 4
store float 0x402ECCCCC0000000, float* %a, align 4
store float 0x4022333340000000, float* %b, align 4
%0 = load float, float* %b, align 4
%1 = load float, float* %a, align 4
%mul = tail call double asm sideeffect "fmuls $0, $1, $2", "={f20},{f21},{f8}"(float* %a, float* %b)
ret double %mul
}

View File

@ -0,0 +1,20 @@
; RUN: llc %s -O0 -march=sparc -mcpu=leon2 -o - | FileCheck %s
; RUN: llc %s -O0 -march=sparc -mcpu=leon3 -o - | FileCheck %s
; RUN: llc %s -O0 -march=sparc -mcpu=leon4 -o - | FileCheck %s
; CHECK-LABEL: smac_test:
; CHECK: smac %o1, %o0, %o0
define i32 @smac_test(i16* %a, i16* %b) {
entry:
; %0 = tail call i32 asm sideeffect "smac $2, $1, $0", "={r2},{r3},{r4}"(i16* %a, i16* %b)
%0 = tail call i32 asm sideeffect "smac $2, $1, $0", "=r,rI,r"(i16* %a, i16* %b)
ret i32 %0
}
; CHECK-LABEL: umac_test:
; CHECK: umac %o1, %o0, %o0
define i32 @umac_test(i16* %a, i16* %b) {
entry:
%0 = tail call i32 asm sideeffect "umac $2, $1, $0", "=r,rI,r"(i16* %a, i16* %b)
ret i32 %0
}