mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-05 01:56:16 +00:00
This patch closes PR#32216: Better testing of schedule model instruction latencies/throughputs.
The details are here: https://reviews.llvm.org/D30941 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300311 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e92557c930
commit
3796561c6e
@ -112,6 +112,9 @@ public:
|
||||
typedef std::pair<const GlobalVariable *, unsigned> GOTEquivUsePair;
|
||||
MapVector<const MCSymbol *, GOTEquivUsePair> GlobalGOTEquivs;
|
||||
|
||||
/// Enable print [latency:throughput] in output
|
||||
bool EnablePrintSchedInfo = false;
|
||||
|
||||
private:
|
||||
MCSymbol *CurrentFnBegin = nullptr;
|
||||
MCSymbol *CurrentFnEnd = nullptr;
|
||||
|
@ -189,6 +189,10 @@ public:
|
||||
/// This is typically one cycle.
|
||||
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefIdx,
|
||||
const MachineInstr *DepMI) const;
|
||||
|
||||
/// \brief Compute the reciprocal throughput of the given instruction.
|
||||
Optional<double> computeInstrRThroughput(const MachineInstr *MI) const;
|
||||
Optional<double> computeInstrRThroughput(unsigned Opcode) const;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
@ -98,7 +98,8 @@ public:
|
||||
void EmitSLEB128Value(const MCExpr *Value) override;
|
||||
void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override;
|
||||
void ChangeSection(MCSection *Section, const MCExpr *Subsection) override;
|
||||
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo& STI) override;
|
||||
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
|
||||
bool = false) override;
|
||||
|
||||
/// \brief Emit an instruction to a special fragment, because this instruction
|
||||
/// can change its size during relaxation.
|
||||
|
@ -836,7 +836,9 @@ public:
|
||||
}
|
||||
|
||||
/// \brief Emit the given \p Instruction into the current section.
|
||||
virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
|
||||
/// PrintSchedInfo == true then schedul comment should be added to output
|
||||
virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
|
||||
bool PrintSchedInfo = false);
|
||||
|
||||
/// \brief Set the bundle alignment mode from now on in the section.
|
||||
/// The argument is the power of 2 to which the alignment is set. The
|
||||
|
@ -26,6 +26,8 @@
|
||||
#include <string>
|
||||
|
||||
namespace llvm {
|
||||
class MachineInstr;
|
||||
class MCInst;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
@ -167,6 +169,15 @@ public:
|
||||
auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU);
|
||||
return Found != ProcDesc.end() && StringRef(Found->Key) == CPU;
|
||||
}
|
||||
|
||||
/// Returns string representation of scheduler comment
|
||||
virtual std::string getSchedInfoStr(const MachineInstr &MI) const {
|
||||
return std::string();
|
||||
}
|
||||
|
||||
virtual std::string getSchedInfoStr(MCInst const &MCI) const {
|
||||
return std::string();
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "llvm/CodeGen/PBQPRAConstraint.h"
|
||||
#include "llvm/CodeGen/SchedulerRegistry.h"
|
||||
#include "llvm/CodeGen/ScheduleDAGMutation.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/CodeGen.h"
|
||||
#include <memory>
|
||||
@ -143,6 +144,9 @@ public:
|
||||
/// TargetLowering preference). It does not yet disable the postRA scheduler.
|
||||
virtual bool enableMachineScheduler() const;
|
||||
|
||||
/// \brief Support printing of [latency:throughput] comment in output .S file.
|
||||
virtual bool supportPrintSchedInfo() const { return false; }
|
||||
|
||||
/// \brief True if the machine scheduler should disable the TLI preference
|
||||
/// for preRA scheduling with the source level scheduler.
|
||||
virtual bool enableMachineSchedDefaultSched() const { return true; }
|
||||
@ -227,6 +231,10 @@ public:
|
||||
/// Please use MachineRegisterInfo::subRegLivenessEnabled() instead where
|
||||
/// possible.
|
||||
virtual bool enableSubRegLiveness() const { return false; }
|
||||
|
||||
/// Returns string representation of scheduler comment
|
||||
std::string getSchedInfoStr(const MachineInstr &MI) const override;
|
||||
std::string getSchedInfoStr(MCInst const &MCI) const override;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
@ -123,6 +123,10 @@ static const char *const CodeViewLineTablesGroupDescription =
|
||||
|
||||
STATISTIC(EmittedInsts, "Number of machine instrs printed");
|
||||
|
||||
static cl::opt<bool>
|
||||
PrintSchedule("print-schedule", cl::Hidden, cl::init(false),
|
||||
cl::desc("Print 'sched: [latency:throughput]' in .s output"));
|
||||
|
||||
char AsmPrinter::ID = 0;
|
||||
|
||||
typedef DenseMap<GCStrategy*, std::unique_ptr<GCMetadataPrinter>> gcp_map_type;
|
||||
@ -720,7 +724,8 @@ void AsmPrinter::EmitFunctionEntryLabel() {
|
||||
}
|
||||
|
||||
/// emitComments - Pretty-print comments for instructions.
|
||||
static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
|
||||
static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
|
||||
AsmPrinter *AP) {
|
||||
const MachineFunction *MF = MI.getParent()->getParent();
|
||||
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
|
||||
|
||||
@ -728,6 +733,7 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
|
||||
int FI;
|
||||
|
||||
const MachineFrameInfo &MFI = MF->getFrameInfo();
|
||||
bool Commented = false;
|
||||
|
||||
// We assume a single instruction only has a spill or reload, not
|
||||
// both.
|
||||
@ -735,24 +741,39 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
|
||||
if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
|
||||
if (MFI.isSpillSlotObjectIndex(FI)) {
|
||||
MMO = *MI.memoperands_begin();
|
||||
CommentOS << MMO->getSize() << "-byte Reload\n";
|
||||
CommentOS << MMO->getSize() << "-byte Reload";
|
||||
Commented = true;
|
||||
}
|
||||
} else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) {
|
||||
if (MFI.isSpillSlotObjectIndex(FI))
|
||||
CommentOS << MMO->getSize() << "-byte Folded Reload\n";
|
||||
if (MFI.isSpillSlotObjectIndex(FI)) {
|
||||
CommentOS << MMO->getSize() << "-byte Folded Reload";
|
||||
Commented = true;
|
||||
}
|
||||
} else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
|
||||
if (MFI.isSpillSlotObjectIndex(FI)) {
|
||||
MMO = *MI.memoperands_begin();
|
||||
CommentOS << MMO->getSize() << "-byte Spill\n";
|
||||
CommentOS << MMO->getSize() << "-byte Spill";
|
||||
Commented = true;
|
||||
}
|
||||
} else if (TII->hasStoreToStackSlot(MI, MMO, FI)) {
|
||||
if (MFI.isSpillSlotObjectIndex(FI))
|
||||
CommentOS << MMO->getSize() << "-byte Folded Spill\n";
|
||||
if (MFI.isSpillSlotObjectIndex(FI)) {
|
||||
CommentOS << MMO->getSize() << "-byte Folded Spill";
|
||||
Commented = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for spill-induced copies
|
||||
if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
|
||||
CommentOS << " Reload Reuse\n";
|
||||
if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) {
|
||||
Commented = true;
|
||||
CommentOS << " Reload Reuse";
|
||||
}
|
||||
|
||||
if (Commented && AP->EnablePrintSchedInfo)
|
||||
// If any comment was added above and we need sched info comment then
|
||||
// add this new comment just after the above comment w/o "\n" between them.
|
||||
CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n";
|
||||
else if (Commented)
|
||||
CommentOS << "\n";
|
||||
}
|
||||
|
||||
/// emitImplicitDef - This method emits the specified machine instruction
|
||||
@ -966,7 +987,7 @@ void AsmPrinter::EmitFunctionBody() {
|
||||
}
|
||||
|
||||
if (isVerbose())
|
||||
emitComments(MI, OutStreamer->GetCommentOS());
|
||||
emitComments(MI, OutStreamer->GetCommentOS(), this);
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
case TargetOpcode::CFI_INSTRUCTION:
|
||||
@ -1383,6 +1404,11 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
|
||||
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
|
||||
if (isVerbose())
|
||||
LI = &getAnalysis<MachineLoopInfo>();
|
||||
|
||||
const TargetSubtargetInfo &STI = MF.getSubtarget();
|
||||
EnablePrintSchedInfo = PrintSchedule.getNumOccurrences()
|
||||
? PrintSchedule
|
||||
: STI.supportPrintSchedInfo();
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -277,7 +277,11 @@ unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
|
||||
if (SCDesc->isValid() && !SCDesc->isVariant())
|
||||
return computeInstrLatency(*SCDesc);
|
||||
|
||||
llvm_unreachable("No MI sched latency");
|
||||
if (SCDesc->isValid()) {
|
||||
assert (!SCDesc->isVariant() && "No MI sched latency: SCDesc->isVariant()");
|
||||
return computeInstrLatency(*SCDesc);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned
|
||||
@ -331,3 +335,68 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static Optional<double>
|
||||
getRTroughputFromItineraries(unsigned schedClass,
|
||||
const InstrItineraryData *IID){
|
||||
double Unknown = std::numeric_limits<double>::infinity();
|
||||
double Throughput = Unknown;
|
||||
|
||||
for (const InstrStage *IS = IID->beginStage(schedClass),
|
||||
*E = IID->endStage(schedClass);
|
||||
IS != E; ++IS) {
|
||||
unsigned Cycles = IS->getCycles();
|
||||
if (!Cycles)
|
||||
continue;
|
||||
Throughput =
|
||||
std::min(Throughput, countPopulation(IS->getUnits()) * 1.0 / Cycles);
|
||||
}
|
||||
// We need reciprocal throughput that's why we return such value.
|
||||
return 1 / Throughput;
|
||||
}
|
||||
|
||||
static Optional<double>
|
||||
getRTroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc,
|
||||
const TargetSubtargetInfo *STI,
|
||||
const MCSchedModel &SchedModel) {
|
||||
double Unknown = std::numeric_limits<double>::infinity();
|
||||
double Throughput = Unknown;
|
||||
|
||||
for (const MCWriteProcResEntry *WPR = STI->getWriteProcResBegin(SCDesc),
|
||||
*WEnd = STI->getWriteProcResEnd(SCDesc);
|
||||
WPR != WEnd; ++WPR) {
|
||||
unsigned Cycles = WPR->Cycles;
|
||||
if (!Cycles)
|
||||
return Optional<double>();
|
||||
|
||||
unsigned NumUnits =
|
||||
SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits;
|
||||
Throughput = std::min(Throughput, NumUnits * 1.0 / Cycles);
|
||||
}
|
||||
// We need reciprocal throughput that's why we return such value.
|
||||
return 1 / Throughput;
|
||||
}
|
||||
|
||||
Optional<double>
|
||||
TargetSchedModel::computeInstrRThroughput(const MachineInstr *MI) const {
|
||||
if (hasInstrItineraries())
|
||||
return getRTroughputFromItineraries(MI->getDesc().getSchedClass(),
|
||||
getInstrItineraries());
|
||||
if (hasInstrSchedModel())
|
||||
return getRTroughputFromInstrSchedModel(resolveSchedClass(MI), STI,
|
||||
SchedModel);
|
||||
return Optional<double>();
|
||||
}
|
||||
|
||||
Optional<double>
|
||||
TargetSchedModel::computeInstrRThroughput(unsigned Opcode) const {
|
||||
unsigned SchedClass = TII->get(Opcode).getSchedClass();
|
||||
if (hasInstrItineraries())
|
||||
return getRTroughputFromItineraries(SchedClass, getInstrItineraries());
|
||||
if (hasInstrSchedModel()) {
|
||||
const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
|
||||
if (SCDesc->isValid() && !SCDesc->isVariant())
|
||||
return getRTroughputFromInstrSchedModel(SCDesc, STI, SchedModel);
|
||||
}
|
||||
return Optional<double>();
|
||||
}
|
||||
|
@ -11,6 +11,9 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/TargetSchedule.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
using namespace llvm;
|
||||
|
||||
@ -52,3 +55,46 @@ bool TargetSubtargetInfo::enablePostRAScheduler() const {
|
||||
bool TargetSubtargetInfo::useAA() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
static std::string createSchedInfoStr(unsigned Latency,
|
||||
Optional<double> RThroughput) {
|
||||
static const char *SchedPrefix = " sched: [";
|
||||
std::string Comment;
|
||||
raw_string_ostream CS(Comment);
|
||||
if (Latency > 0 && RThroughput.hasValue())
|
||||
CS << SchedPrefix << Latency << format(":%2.2f", RThroughput.getValue())
|
||||
<< "]";
|
||||
else if (Latency > 0)
|
||||
CS << SchedPrefix << Latency << ":?]";
|
||||
else if (RThroughput.hasValue())
|
||||
CS << SchedPrefix << "?:" << RThroughput.getValue() << "]";
|
||||
CS.flush();
|
||||
return Comment;
|
||||
}
|
||||
|
||||
/// Returns string representation of scheduler comment
|
||||
std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const {
|
||||
if (MI.isPseudo() || MI.isTerminator())
|
||||
return std::string();
|
||||
// We don't cache TSchedModel because it depends on TargetInstrInfo
|
||||
// that could be changed during the compilation
|
||||
TargetSchedModel TSchedModel;
|
||||
TSchedModel.init(getSchedModel(), this, getInstrInfo());
|
||||
unsigned Latency = TSchedModel.computeInstrLatency(&MI);
|
||||
Optional<double> RThroughput = TSchedModel.computeInstrRThroughput(&MI);
|
||||
return createSchedInfoStr(Latency, RThroughput);
|
||||
}
|
||||
|
||||
/// Returns string representation of scheduler comment
|
||||
std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {
|
||||
// We don't cache TSchedModel because it depends on TargetInstrInfo
|
||||
// that could be changed during the compilation
|
||||
TargetSchedModel TSchedModel;
|
||||
TSchedModel.init(getSchedModel(), this, getInstrInfo());
|
||||
if (!TSchedModel.hasInstrSchedModel())
|
||||
return std::string();
|
||||
unsigned Latency = TSchedModel.computeInstrLatency(MCI.getOpcode());
|
||||
Optional<double> RThroughput =
|
||||
TSchedModel.computeInstrRThroughput(MCI.getOpcode());
|
||||
return createSchedInfoStr(Latency, RThroughput);
|
||||
}
|
||||
|
@ -103,7 +103,10 @@ public:
|
||||
void AddComment(const Twine &T, bool EOL = true) override;
|
||||
|
||||
/// AddEncodingComment - Add a comment showing the encoding of an instruction.
|
||||
void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &);
|
||||
/// If PrintSchedInfo - is true then the comment sched:[x:y] should
|
||||
// be added to output if it's being supported by target
|
||||
void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &,
|
||||
bool PrintSchedInfo);
|
||||
|
||||
/// GetCommentOS - Return a raw_ostream that comments can be written to.
|
||||
/// Unlike AddComment, you are required to terminate comments with \n if you
|
||||
@ -278,7 +281,8 @@ public:
|
||||
void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except) override;
|
||||
void EmitWinEHHandlerData() override;
|
||||
|
||||
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
|
||||
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
|
||||
bool PrintSchedInfo) override;
|
||||
|
||||
void EmitBundleAlignMode(unsigned AlignPow2) override;
|
||||
void EmitBundleLock(bool AlignToEnd) override;
|
||||
@ -1504,7 +1508,8 @@ void MCAsmStreamer::EmitWinCFIEndProlog() {
|
||||
}
|
||||
|
||||
void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
|
||||
const MCSubtargetInfo &STI) {
|
||||
const MCSubtargetInfo &STI,
|
||||
bool PrintSchedInfo) {
|
||||
raw_ostream &OS = GetCommentOS();
|
||||
SmallString<256> Code;
|
||||
SmallVector<MCFixup, 4> Fixups;
|
||||
@ -1577,7 +1582,11 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
|
||||
}
|
||||
}
|
||||
}
|
||||
OS << "]\n";
|
||||
OS << "]";
|
||||
// If we are not going to add fixup or schedul comments after this point then
|
||||
// we have to end the current comment line with "\n".
|
||||
if (Fixups.size() || !PrintSchedInfo)
|
||||
OS << "\n";
|
||||
|
||||
for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
|
||||
MCFixup &F = Fixups[i];
|
||||
@ -1588,16 +1597,19 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
|
||||
}
|
||||
|
||||
void MCAsmStreamer::EmitInstruction(const MCInst &Inst,
|
||||
const MCSubtargetInfo &STI) {
|
||||
const MCSubtargetInfo &STI,
|
||||
bool PrintSchedInfo) {
|
||||
assert(getCurrentSectionOnly() &&
|
||||
"Cannot emit contents before setting section!");
|
||||
|
||||
// Show the encoding in a comment if we have a code emitter.
|
||||
if (Emitter)
|
||||
AddEncodingComment(Inst, STI);
|
||||
AddEncodingComment(Inst, STI, PrintSchedInfo);
|
||||
|
||||
// Show the MCInst if enabled.
|
||||
if (ShowInst) {
|
||||
if (PrintSchedInfo)
|
||||
GetCommentOS() << "\n";
|
||||
Inst.dump_pretty(GetCommentOS(), InstPrinter.get(), "\n ");
|
||||
GetCommentOS() << "\n";
|
||||
}
|
||||
@ -1607,6 +1619,16 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst,
|
||||
else
|
||||
InstPrinter->printInst(&Inst, OS, "", STI);
|
||||
|
||||
if (PrintSchedInfo) {
|
||||
std::string SI = STI.getSchedInfoStr(Inst);
|
||||
if (!SI.empty())
|
||||
GetCommentOS() << SI;
|
||||
}
|
||||
|
||||
StringRef Comments = CommentToEmit;
|
||||
if (Comments.size() && Comments.back() != '\n')
|
||||
GetCommentOS() << "\n";
|
||||
|
||||
EmitEOL();
|
||||
}
|
||||
|
||||
|
@ -238,7 +238,7 @@ bool MCObjectStreamer::mayHaveInstructions(MCSection &Sec) const {
|
||||
}
|
||||
|
||||
void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
|
||||
const MCSubtargetInfo &STI) {
|
||||
const MCSubtargetInfo &STI, bool) {
|
||||
MCStreamer::EmitInstruction(Inst, STI);
|
||||
|
||||
MCSection *Sec = getCurrentSectionOnly();
|
||||
|
@ -777,8 +777,8 @@ void MCStreamer::visitUsedExpr(const MCExpr &Expr) {
|
||||
}
|
||||
}
|
||||
|
||||
void MCStreamer::EmitInstruction(const MCInst &Inst,
|
||||
const MCSubtargetInfo &STI) {
|
||||
void MCStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
|
||||
bool) {
|
||||
// Scan for values.
|
||||
for (unsigned i = Inst.getNumOperands(); i--;)
|
||||
if (Inst.getOperand(i).isExpr())
|
||||
|
@ -78,7 +78,7 @@ RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); }
|
||||
RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {}
|
||||
|
||||
void RecordStreamer::EmitInstruction(const MCInst &Inst,
|
||||
const MCSubtargetInfo &STI) {
|
||||
const MCSubtargetInfo &STI, bool) {
|
||||
MCStreamer::EmitInstruction(Inst, STI);
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,8 @@ public:
|
||||
const_iterator begin();
|
||||
const_iterator end();
|
||||
RecordStreamer(MCContext &Context);
|
||||
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
|
||||
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
|
||||
bool) override;
|
||||
void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
|
||||
void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
|
||||
bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
|
||||
|
@ -102,8 +102,8 @@ public:
|
||||
/// This function is the one used to emit instruction data into the ELF
|
||||
/// streamer. We override it to add the appropriate mapping symbol if
|
||||
/// necessary.
|
||||
void EmitInstruction(const MCInst &Inst,
|
||||
const MCSubtargetInfo &STI) override {
|
||||
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
|
||||
bool) override {
|
||||
EmitA64MappingSymbol();
|
||||
MCELFStreamer::EmitInstruction(Inst, STI);
|
||||
}
|
||||
|
@ -477,8 +477,8 @@ public:
|
||||
/// This function is the one used to emit instruction data into the ELF
|
||||
/// streamer. We override it to add the appropriate mapping symbol if
|
||||
/// necessary.
|
||||
void EmitInstruction(const MCInst& Inst,
|
||||
const MCSubtargetInfo &STI) override {
|
||||
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
|
||||
bool) override {
|
||||
if (IsThumb)
|
||||
EmitThumbMappingSymbol();
|
||||
else
|
||||
|
@ -44,7 +44,7 @@ static cl::opt<unsigned> GPSize
|
||||
cl::init(8));
|
||||
|
||||
void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB,
|
||||
const MCSubtargetInfo &STI) {
|
||||
const MCSubtargetInfo &STI, bool) {
|
||||
assert(MCB.getOpcode() == Hexagon::BUNDLE);
|
||||
assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE);
|
||||
assert(HexagonMCInstrInfo::bundleSize(MCB) > 0);
|
||||
|
@ -34,7 +34,8 @@ public:
|
||||
MCELFStreamer(Context, TAB, OS, Emitter),
|
||||
MCII (createHexagonMCInstrInfo()) {}
|
||||
|
||||
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
|
||||
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
|
||||
bool) override;
|
||||
void EmitSymbol(const MCInst &Inst);
|
||||
void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
|
||||
unsigned ByteAlignment,
|
||||
|
@ -20,7 +20,7 @@
|
||||
using namespace llvm;
|
||||
|
||||
void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
|
||||
const MCSubtargetInfo &STI) {
|
||||
const MCSubtargetInfo &STI, bool) {
|
||||
MCELFStreamer::EmitInstruction(Inst, STI);
|
||||
|
||||
MCContext &Context = getContext();
|
||||
|
@ -45,7 +45,8 @@ public:
|
||||
/// \p Inst is actually emitted. For example, we can inspect the operands and
|
||||
/// gather sufficient information that allows us to reason about the register
|
||||
/// usage for the translation unit.
|
||||
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
|
||||
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
|
||||
bool = false) override;
|
||||
|
||||
/// Overriding this function allows us to record all labels that should be
|
||||
/// marked as microMIPS. Based on this data marking is done in
|
||||
|
@ -139,8 +139,8 @@ private:
|
||||
public:
|
||||
/// This function is the one used to emit instruction data into the ELF
|
||||
/// streamer. We override it to mask dangerous instructions.
|
||||
void EmitInstruction(const MCInst &Inst,
|
||||
const MCSubtargetInfo &STI) override {
|
||||
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
|
||||
bool) override {
|
||||
// Sandbox indirect jumps.
|
||||
if (isIndirectJump(Inst)) {
|
||||
if (PendingCall)
|
||||
|
@ -1189,8 +1189,6 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
||||
OS << ']';
|
||||
--i; // For loop increments element #.
|
||||
}
|
||||
//MI->print(OS, 0);
|
||||
OS << "\n";
|
||||
|
||||
// We successfully added a comment to this instruction.
|
||||
return true;
|
||||
|
@ -102,7 +102,7 @@ void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
|
||||
}
|
||||
|
||||
void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
|
||||
OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
|
||||
OutStreamer->EmitInstruction(Inst, getSubtargetInfo(), EnablePrintSchedInfo);
|
||||
SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
|
||||
}
|
||||
|
||||
@ -1529,7 +1529,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
SmallVector<int, 64> Mask;
|
||||
DecodePSHUFBMask(C, Mask);
|
||||
if (!Mask.empty())
|
||||
OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
|
||||
OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
|
||||
!EnablePrintSchedInfo);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -1600,7 +1601,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
SmallVector<int, 16> Mask;
|
||||
DecodeVPERMILPMask(C, ElSize, Mask);
|
||||
if (!Mask.empty())
|
||||
OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
|
||||
OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
|
||||
!EnablePrintSchedInfo);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -1630,7 +1632,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
SmallVector<int, 16> Mask;
|
||||
DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask);
|
||||
if (!Mask.empty())
|
||||
OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
|
||||
OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
|
||||
!EnablePrintSchedInfo);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -1646,7 +1649,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
SmallVector<int, 16> Mask;
|
||||
DecodeVPPERMMask(C, Mask);
|
||||
if (!Mask.empty())
|
||||
OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
|
||||
OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
|
||||
!EnablePrintSchedInfo);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -1706,7 +1710,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
CS << "?";
|
||||
}
|
||||
CS << "]";
|
||||
OutStreamer->AddComment(CS.str());
|
||||
OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
|
||||
} else if (auto *CV = dyn_cast<ConstantVector>(C)) {
|
||||
CS << "<";
|
||||
for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
|
||||
@ -1738,7 +1742,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
}
|
||||
}
|
||||
CS << ">";
|
||||
OutStreamer->AddComment(CS.str());
|
||||
OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -624,6 +624,9 @@ public:
|
||||
/// Enable the MachineScheduler pass for all X86 subtargets.
|
||||
bool enableMachineScheduler() const override { return true; }
|
||||
|
||||
// TODO: Update the regression tests and return true.
|
||||
bool supportPrintSchedInfo() const override { return false; }
|
||||
|
||||
bool enableEarlyIfConversion() const override;
|
||||
|
||||
/// Return the instruction itineraries based on the subtarget selection.
|
||||
|
@ -2,12 +2,12 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge| FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL-NO-FMA
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
|
||||
|
||||
; If the target's divss/divps instructions are substantially
|
||||
; slower than rcpss/rcpps with a Newton-Raphson refinement,
|
||||
@ -25,11 +25,47 @@ define float @f32_no_estimate(float %x) #0 {
|
||||
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: f32_no_estimate:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVX-RECIP-LABEL: f32_no_estimate:
|
||||
; AVX-RECIP: # BB#0:
|
||||
; AVX-RECIP-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-RECIP-NEXT: vdivss %xmm0, %xmm1, %xmm0
|
||||
; AVX-RECIP-NEXT: retq
|
||||
;
|
||||
; FMA-RECIP-LABEL: f32_no_estimate:
|
||||
; FMA-RECIP: # BB#0:
|
||||
; FMA-RECIP-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; FMA-RECIP-NEXT: vdivss %xmm0, %xmm1, %xmm0
|
||||
; FMA-RECIP-NEXT: retq
|
||||
;
|
||||
; BTVER2-LABEL: f32_no_estimate:
|
||||
; BTVER2: # BB#0:
|
||||
; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
|
||||
; BTVER2-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [19:19.00]
|
||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; SANDY-LABEL: f32_no_estimate:
|
||||
; SANDY: # BB#0:
|
||||
; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
|
||||
; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
|
||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
||||
;
|
||||
; HASWELL-LABEL: f32_no_estimate:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
|
||||
; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
|
||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; HASWELL-NO-FMA-LABEL: f32_no_estimate:
|
||||
; HASWELL-NO-FMA: # BB#0:
|
||||
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; HASWELL-NO-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0
|
||||
; HASWELL-NO-FMA-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: f32_no_estimate:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
|
||||
; AVX512-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
|
||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
||||
%div = fdiv fast float 1.0, %x
|
||||
ret float %div
|
||||
}
|
||||
@ -65,30 +101,30 @@ define float @f32_one_step(float %x) #1 {
|
||||
;
|
||||
; BTVER2-LABEL: f32_one_step:
|
||||
; BTVER2: # BB#0:
|
||||
; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
|
||||
; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
|
||||
; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0
|
||||
; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
|
||||
; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
|
||||
; BTVER2-NEXT: retq
|
||||
; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
|
||||
; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; SANDY-LABEL: f32_one_step:
|
||||
; SANDY: # BB#0:
|
||||
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
|
||||
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
|
||||
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0
|
||||
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
|
||||
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
|
||||
; SANDY-NEXT: retq
|
||||
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
|
||||
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
||||
;
|
||||
; HASWELL-LABEL: f32_one_step:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
|
||||
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
|
||||
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
|
||||
; HASWELL-NEXT: retq
|
||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; HASWELL-NO-FMA-LABEL: f32_one_step:
|
||||
; HASWELL-NO-FMA: # BB#0:
|
||||
@ -105,7 +141,7 @@ define float @f32_one_step(float %x) #1 {
|
||||
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
|
||||
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
||||
%div = fdiv fast float 1.0, %x
|
||||
ret float %div
|
||||
}
|
||||
@ -155,42 +191,42 @@ define float @f32_two_step(float %x) #2 {
|
||||
;
|
||||
; BTVER2-LABEL: f32_two_step:
|
||||
; BTVER2: # BB#0:
|
||||
; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
|
||||
; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2
|
||||
; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2
|
||||
; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2
|
||||
; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1
|
||||
; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
|
||||
; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0
|
||||
; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
|
||||
; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
|
||||
; BTVER2-NEXT: retq
|
||||
; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00]
|
||||
; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; SANDY-LABEL: f32_two_step:
|
||||
; SANDY: # BB#0:
|
||||
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
|
||||
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2
|
||||
; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2
|
||||
; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2
|
||||
; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1
|
||||
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
|
||||
; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0
|
||||
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
|
||||
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
|
||||
; SANDY-NEXT: retq
|
||||
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50]
|
||||
; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
||||
; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
||||
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
||||
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
||||
;
|
||||
; HASWELL-LABEL: f32_two_step:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
|
||||
; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; HASWELL-NEXT: vmovaps %xmm1, %xmm3
|
||||
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
|
||||
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3
|
||||
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3
|
||||
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0
|
||||
; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0
|
||||
; HASWELL-NEXT: retq
|
||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; HASWELL-NO-FMA-LABEL: f32_two_step:
|
||||
; HASWELL-NO-FMA: # BB#0:
|
||||
@ -209,13 +245,13 @@ define float @f32_two_step(float %x) #2 {
|
||||
; AVX512-LABEL: f32_two_step:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vmovaps %xmm1, %xmm3
|
||||
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
|
||||
; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3
|
||||
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3
|
||||
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0
|
||||
; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
||||
%div = fdiv fast float 1.0, %x
|
||||
ret float %div
|
||||
}
|
||||
@ -242,21 +278,21 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
|
||||
;
|
||||
; BTVER2-LABEL: v4f32_no_estimate:
|
||||
; BTVER2: # BB#0:
|
||||
; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; BTVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0
|
||||
; BTVER2-NEXT: retq
|
||||
; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
||||
; BTVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [19:19.00]
|
||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; SANDY-LABEL: v4f32_no_estimate:
|
||||
; SANDY: # BB#0:
|
||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0
|
||||
; SANDY-NEXT: retq
|
||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
||||
; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
|
||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
||||
;
|
||||
; HASWELL-LABEL: v4f32_no_estimate:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
|
||||
; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0
|
||||
; HASWELL-NEXT: retq
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50]
|
||||
; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
|
||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; HASWELL-NO-FMA-LABEL: v4f32_no_estimate:
|
||||
; HASWELL-NO-FMA: # BB#0:
|
||||
@ -266,9 +302,9 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
|
||||
;
|
||||
; AVX512-LABEL: v4f32_no_estimate:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
|
||||
; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50]
|
||||
; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
|
||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
||||
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||
ret <4 x float> %div
|
||||
}
|
||||
@ -304,31 +340,31 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
|
||||
;
|
||||
; BTVER2-LABEL: v4f32_one_step:
|
||||
; BTVER2: # BB#0:
|
||||
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; BTVER2-NEXT: vrcpps %xmm0, %xmm1
|
||||
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||
; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
|
||||
; BTVER2-NEXT: retq
|
||||
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
||||
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; SANDY-LABEL: v4f32_one_step:
|
||||
; SANDY: # BB#0:
|
||||
; SANDY-NEXT: vrcpps %xmm0, %xmm1
|
||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
|
||||
; SANDY-NEXT: retq
|
||||
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
||||
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
||||
;
|
||||
; HASWELL-LABEL: v4f32_one_step:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpps %xmm0, %xmm1
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
||||
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
|
||||
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
||||
; HASWELL-NEXT: retq
|
||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; HASWELL-NO-FMA-LABEL: v4f32_one_step:
|
||||
; HASWELL-NO-FMA: # BB#0:
|
||||
@ -342,18 +378,18 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
|
||||
;
|
||||
; KNL-LABEL: v4f32_one_step:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vrcpps %xmm0, %xmm1
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
||||
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
|
||||
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: v4f32_one_step:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
|
||||
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
|
||||
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: retq # sched: [1:1.00]
|
||||
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||
ret <4 x float> %div
|
||||
}
|
||||
@ -403,42 +439,42 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
|
||||
;
|
||||
; BTVER2-LABEL: v4f32_two_step:
|
||||
; BTVER2: # BB#0:
|
||||
; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; BTVER2-NEXT: vrcpps %xmm0, %xmm1
|
||||
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2
|
||||
; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2
|
||||
; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2
|
||||
; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1
|
||||
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0
|
||||
; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
|
||||
; BTVER2-NEXT: retq
|
||||
; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
||||
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; SANDY-LABEL: v4f32_two_step:
|
||||
; SANDY: # BB#0:
|
||||
; SANDY-NEXT: vrcpps %xmm0, %xmm1
|
||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2
|
||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2
|
||||
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2
|
||||
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1
|
||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0
|
||||
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
|
||||
; SANDY-NEXT: retq
|
||||
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
||||
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
||||
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
||||
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
||||
;
|
||||
; HASWELL-LABEL: v4f32_two_step:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpps %xmm0, %xmm1
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
||||
; HASWELL-NEXT: vmovaps %xmm1, %xmm3
|
||||
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
|
||||
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
|
||||
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
|
||||
; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
|
||||
; HASWELL-NEXT: retq
|
||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; HASWELL-NO-FMA-LABEL: v4f32_two_step:
|
||||
; HASWELL-NO-FMA: # BB#0:
|
||||
@ -456,25 +492,25 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
|
||||
;
|
||||
; KNL-LABEL: v4f32_two_step:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vrcpps %xmm0, %xmm1
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
||||
; KNL-NEXT: vmovaps %xmm1, %xmm3
|
||||
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
|
||||
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
|
||||
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
|
||||
; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: v4f32_two_step:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
|
||||
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
||||
; SKX-NEXT: vmovaps %xmm1, %xmm3
|
||||
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
|
||||
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
|
||||
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
|
||||
; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: retq # sched: [1:1.00]
|
||||
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||
ret <4 x float> %div
|
||||
}
|
||||
@ -504,21 +540,21 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
|
||||
;
|
||||
; BTVER2-LABEL: v8f32_no_estimate:
|
||||
; BTVER2: # BB#0:
|
||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
||||
; BTVER2-NEXT: retq
|
||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
||||
; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:19.00]
|
||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; SANDY-LABEL: v8f32_no_estimate:
|
||||
; SANDY: # BB#0:
|
||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
||||
; SANDY-NEXT: retq
|
||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
||||
; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [12:1.00]
|
||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
||||
;
|
||||
; HASWELL-LABEL: v8f32_no_estimate:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
|
||||
; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
||||
; HASWELL-NEXT: retq
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
|
||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; HASWELL-NO-FMA-LABEL: v8f32_no_estimate:
|
||||
; HASWELL-NO-FMA: # BB#0:
|
||||
@ -528,9 +564,9 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
|
||||
;
|
||||
; AVX512-LABEL: v8f32_no_estimate:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
|
||||
; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
|
||||
; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
|
||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
||||
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||
ret <8 x float> %div
|
||||
}
|
||||
@ -573,31 +609,31 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
|
||||
;
|
||||
; BTVER2-LABEL: v8f32_one_step:
|
||||
; BTVER2: # BB#0:
|
||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; BTVER2-NEXT: vrcpps %ymm0, %ymm1
|
||||
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
||||
; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0
|
||||
; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
|
||||
; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
|
||||
; BTVER2-NEXT: retq
|
||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
||||
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; SANDY-LABEL: v8f32_one_step:
|
||||
; SANDY: # BB#0:
|
||||
; SANDY-NEXT: vrcpps %ymm0, %ymm1
|
||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0
|
||||
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
|
||||
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
|
||||
; SANDY-NEXT: retq
|
||||
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
||||
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
||||
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
||||
;
|
||||
; HASWELL-LABEL: v8f32_one_step:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm1
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
|
||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
||||
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
||||
; HASWELL-NEXT: retq
|
||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; HASWELL-NO-FMA-LABEL: v8f32_one_step:
|
||||
; HASWELL-NO-FMA: # BB#0:
|
||||
@ -611,18 +647,18 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
|
||||
;
|
||||
; KNL-LABEL: v8f32_one_step:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vrcpps %ymm0, %ymm1
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
|
||||
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
||||
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: v8f32_one_step:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
|
||||
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
|
||||
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: retq # sched: [1:1.00]
|
||||
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||
ret <8 x float> %div
|
||||
}
|
||||
@ -685,42 +721,42 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
|
||||
;
|
||||
; BTVER2-LABEL: v8f32_two_step:
|
||||
; BTVER2: # BB#0:
|
||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; BTVER2-NEXT: vrcpps %ymm0, %ymm1
|
||||
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2
|
||||
; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2
|
||||
; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2
|
||||
; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
||||
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
||||
; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0
|
||||
; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
|
||||
; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
|
||||
; BTVER2-NEXT: retq
|
||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
||||
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; SANDY-LABEL: v8f32_two_step:
|
||||
; SANDY: # BB#0:
|
||||
; SANDY-NEXT: vrcpps %ymm0, %ymm1
|
||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2
|
||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2
|
||||
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2
|
||||
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
||||
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
|
||||
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
|
||||
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
|
||||
; SANDY-NEXT: retq
|
||||
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
||||
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
|
||||
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
|
||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
|
||||
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
||||
;
|
||||
; HASWELL-LABEL: v8f32_two_step:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm1
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
|
||||
; HASWELL-NEXT: vmovaps %ymm1, %ymm3
|
||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
||||
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
|
||||
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
|
||||
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
|
||||
; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
|
||||
; HASWELL-NEXT: retq
|
||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; HASWELL-NO-FMA-LABEL: v8f32_two_step:
|
||||
; HASWELL-NO-FMA: # BB#0:
|
||||
@ -738,25 +774,25 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
|
||||
;
|
||||
; KNL-LABEL: v8f32_two_step:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vrcpps %ymm0, %ymm1
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
|
||||
; KNL-NEXT: vmovaps %ymm1, %ymm3
|
||||
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
||||
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
|
||||
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
|
||||
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
|
||||
; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: v8f32_two_step:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
|
||||
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
|
||||
; SKX-NEXT: vmovaps %ymm1, %ymm3
|
||||
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
||||
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
|
||||
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
|
||||
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
|
||||
; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: retq # sched: [1:1.00]
|
||||
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||
ret <8 x float> %div
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user