mirror of
https://github.com/RPCS3/llvm.git
synced 2025-05-15 01:46:46 +00:00

This patch introduces the following changes to the DispatchStatistics view: * DispatchStatistics now reports the number of dispatched opcodes instead of the number of dispatched instructions. * The "Dynamic Dispatch Stall Cycles" table now also reports the percentage of stall cycles against the total simulated cycles. This change allows users to easily compare dispatch group sizes with the processor DispatchWidth. Before this change, it was difficult to correlate the two numbers, since DispatchStatistics view reported numbers of instructions (instead of opcodes). DispatchWidth defines the maximum size of a dispatch group in terms of number of micro opcodes. The other change introduced by this patch is related to how DispatchStage generates "instruction dispatch" events. In particular: * There can be multiple dispatch events associated with a same instruction * Each dispatch event now encapsulates the number of dispatched micro opcodes. The number of micro opcodes declared by an instruction may exceed the processor DispatchWidth. Therefore, we cannot assume that instructions are always fully dispatched in a single cycle. DispatchStage knows already how to handle instructions declaring a number of opcodes bigger that DispatchWidth. However, DispatchStage always emitted a single instruction dispatch event (during the first simulated dispatch cycle) for instructions dispatched. With this patch, DispatchStage now correctly notifies multiple dispatch events for instructions that cannot be dispatched in a single cycle. A few views had to be modified. Views can no longer assume that there can only be one dispatch event per instruction. Tests (and docs) have been updated. Differential Revision: https://reviews.llvm.org/D51430 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@341055 91177308-0d34-0410-b5e6-96231b3b80d8
88 lines
3.4 KiB
C++
88 lines
3.4 KiB
C++
//===--------------------- SummaryView.cpp -------------------*- C++ -*-===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
/// \file
|
|
///
|
|
/// This file implements the functionalities used by the SummaryView to print
|
|
/// the report information.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "Views/SummaryView.h"
|
|
#include "Support.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/Support/Format.h"
|
|
|
|
namespace mca {
|
|
|
|
#define DEBUG_TYPE "llvm-mca"
|
|
|
|
using namespace llvm;
|
|
|
|
SummaryView::SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
|
|
unsigned Width)
|
|
: SM(Model), Source(S), DispatchWidth(Width), TotalCycles(0),
|
|
NumMicroOps(0), ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
|
|
ProcResourceMasks(Model.getNumProcResourceKinds(), 0) {
|
|
computeProcResourceMasks(SM, ProcResourceMasks);
|
|
}
|
|
|
|
void SummaryView::onEvent(const HWInstructionEvent &Event) {
|
|
// We are only interested in the "instruction retired" events generated by
|
|
// the retire stage for instructions that are part of iteration #0.
|
|
if (Event.Type != HWInstructionEvent::Retired ||
|
|
Event.IR.getSourceIndex() >= Source.size())
|
|
return;
|
|
|
|
// Update the cumulative number of resource cycles based on the processor
|
|
// resource usage information available from the instruction descriptor. We
|
|
// need to compute the cumulative number of resource cycles for every
|
|
// processor resource which is consumed by an instruction of the block.
|
|
const Instruction &Inst = *Event.IR.getInstruction();
|
|
const InstrDesc &Desc = Inst.getDesc();
|
|
NumMicroOps += Desc.NumMicroOps;
|
|
for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) {
|
|
if (RU.second.size()) {
|
|
const auto It = find(ProcResourceMasks, RU.first);
|
|
assert(It != ProcResourceMasks.end() &&
|
|
"Invalid processor resource mask!");
|
|
ProcResourceUsage[std::distance(ProcResourceMasks.begin(), It)] +=
|
|
RU.second.size();
|
|
}
|
|
}
|
|
}
|
|
|
|
void SummaryView::printView(raw_ostream &OS) const {
|
|
unsigned Iterations = Source.getNumIterations();
|
|
unsigned Instructions = Source.size();
|
|
unsigned TotalInstructions = Instructions * Iterations;
|
|
unsigned TotalUOps = NumMicroOps * Iterations;
|
|
double IPC = (double)TotalInstructions / TotalCycles;
|
|
double UOpsPerCycle = (double)TotalUOps / TotalCycles;
|
|
double BlockRThroughput = computeBlockRThroughput(
|
|
SM, DispatchWidth, NumMicroOps, ProcResourceUsage);
|
|
|
|
std::string Buffer;
|
|
raw_string_ostream TempStream(Buffer);
|
|
TempStream << "Iterations: " << Iterations;
|
|
TempStream << "\nInstructions: " << TotalInstructions;
|
|
TempStream << "\nTotal Cycles: " << TotalCycles;
|
|
TempStream << "\nTotal uOps: " << TotalUOps << '\n';
|
|
TempStream << "\nDispatch Width: " << DispatchWidth;
|
|
TempStream << "\nuOps Per Cycle: "
|
|
<< format("%.2f", floor((UOpsPerCycle * 100) + 0.5) / 100);
|
|
TempStream << "\nIPC: "
|
|
<< format("%.2f", floor((IPC * 100) + 0.5) / 100);
|
|
TempStream << "\nBlock RThroughput: "
|
|
<< format("%.1f", floor((BlockRThroughput * 10) + 0.5) / 10)
|
|
<< '\n';
|
|
TempStream.flush();
|
|
OS << Buffer;
|
|
}
|
|
} // namespace mca.
|