mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-04-03 00:01:39 +00:00
[llvm-mca] Speed up the computation of the wait/ready/issued sets in the Scheduler.
This patch is a follow-up to r338702. We don't need to use a map to model the wait/ready/issued sets. It is much more efficient to use a vector instead. This patch gives us an average 7.5% speedup (on top of the ~12% speedup obtained after r338702). llvm-svn: 338883
This commit is contained in:
parent
a13157e779
commit
3d390604c7
@ -645,32 +645,32 @@ available units from the group; by default, the resource manager uses a
|
||||
round-robin selector to guarantee that resource usage is uniformly distributed
|
||||
between all units of a group.
|
||||
|
||||
:program:`llvm-mca`'s scheduler implements three instruction queues:
|
||||
:program:`llvm-mca`'s scheduler internally groups instructions into three sets:
|
||||
|
||||
* WaitQueue: a queue of instructions whose operands are not ready.
|
||||
* ReadyQueue: a queue of instructions ready to execute.
|
||||
* IssuedQueue: a queue of instructions executing.
|
||||
* WaitSet: a set of instructions whose operands are not ready.
|
||||
* ReadySet: a set of instructions ready to execute.
|
||||
* IssuedSet: a set of instructions executing.
|
||||
|
||||
Depending on the operand availability, instructions that are dispatched to the
|
||||
scheduler are either placed into the WaitQueue or into the ReadyQueue.
|
||||
Depending on the operands availability, instructions that are dispatched to the
|
||||
scheduler are either placed into the WaitSet or into the ReadySet.
|
||||
|
||||
Every cycle, the scheduler checks if instructions can be moved from the
|
||||
WaitQueue to the ReadyQueue, and if instructions from the ReadyQueue can be
|
||||
issued to the underlying pipelines. The algorithm prioritizes older instructions
|
||||
over younger instructions.
|
||||
Every cycle, the scheduler checks if instructions can be moved from the WaitSet
|
||||
to the ReadySet, and if instructions from the ReadySet can be issued to the
|
||||
underlying pipelines. The algorithm prioritizes older instructions over younger
|
||||
instructions.
|
||||
|
||||
Write-Back and Retire Stage
|
||||
"""""""""""""""""""""""""""
|
||||
Issued instructions are moved from the ReadyQueue to the IssuedQueue. There,
|
||||
Issued instructions are moved from the ReadySet to the IssuedSet. There,
|
||||
instructions wait until they reach the write-back stage. At that point, they
|
||||
get removed from the queue and the retire control unit is notified.
|
||||
|
||||
When instructions are executed, the retire control unit flags the
|
||||
instruction as "ready to retire."
|
||||
When instructions are executed, the retire control unit flags the instruction as
|
||||
"ready to retire."
|
||||
|
||||
Instructions are retired in program order. The register file is notified of
|
||||
the retirement so that it can free the physical registers that were allocated
|
||||
for the instruction during the register renaming stage.
|
||||
Instructions are retired in program order. The register file is notified of the
|
||||
retirement so that it can free the physical registers that were allocated for
|
||||
the instruction during the register renaming stage.
|
||||
|
||||
Load/Store Unit and Memory Consistency Model
|
||||
""""""""""""""""""""""""""""""""""""""""""""
|
||||
|
@ -37,7 +37,7 @@ void ExecuteStage::reclaimSchedulerResources() {
|
||||
// Update the scheduler's instruction queues.
|
||||
void ExecuteStage::updateSchedulerQueues() {
|
||||
SmallVector<InstRef, 4> InstructionIDs;
|
||||
HWS.updateIssuedQueue(InstructionIDs);
|
||||
HWS.updateIssuedSet(InstructionIDs);
|
||||
for (const InstRef &IR : InstructionIDs)
|
||||
notifyInstructionExecuted(IR);
|
||||
InstructionIDs.clear();
|
||||
@ -65,9 +65,9 @@ void ExecuteStage::issueReadyInstructions() {
|
||||
// Instructions that have been issued during this cycle might have unblocked
|
||||
// other dependent instructions. Dependent instructions may be issued during
|
||||
// this same cycle if operands have ReadAdvance entries. Promote those
|
||||
// instructions to the ReadyQueue and tell to the caller that we need
|
||||
// instructions to the ReadySet and tell to the caller that we need
|
||||
// another round of 'issue()'.
|
||||
HWS.promoteToReadyQueue(InstructionIDs);
|
||||
HWS.promoteToReadySet(InstructionIDs);
|
||||
for (const InstRef &I : InstructionIDs)
|
||||
notifyInstructionReady(I);
|
||||
InstructionIDs.clear();
|
||||
|
@ -384,9 +384,12 @@ public:
|
||||
Instruction *getInstruction() { return second; }
|
||||
const Instruction *getInstruction() const { return second; }
|
||||
|
||||
/// Returns true if this InstRef has been populated.
|
||||
/// Returns true if this references a valid instruction.
|
||||
bool isValid() const { return second != nullptr; }
|
||||
|
||||
/// Invalidate this reference.
|
||||
void invalidate() { second = nullptr; }
|
||||
|
||||
#ifndef NDEBUG
|
||||
void print(llvm::raw_ostream &OS) const { OS << getSourceIndex(); }
|
||||
#endif
|
||||
|
@ -250,9 +250,9 @@ void ResourceManager::releaseResource(uint64_t ResourceID) {
|
||||
|
||||
#ifndef NDEBUG
|
||||
void Scheduler::dump() const {
|
||||
dbgs() << "[SCHEDULER]: WaitQueue size is: " << WaitQueue.size() << '\n';
|
||||
dbgs() << "[SCHEDULER]: ReadyQueue size is: " << ReadyQueue.size() << '\n';
|
||||
dbgs() << "[SCHEDULER]: IssuedQueue size is: " << IssuedQueue.size() << '\n';
|
||||
dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n';
|
||||
dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n';
|
||||
dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n';
|
||||
Resources->dump();
|
||||
}
|
||||
#endif
|
||||
@ -296,7 +296,7 @@ void Scheduler::issueInstructionImpl(
|
||||
IS->execute();
|
||||
|
||||
if (IS->isExecuting())
|
||||
IssuedQueue[IR.getSourceIndex()] = IS;
|
||||
IssuedSet.emplace_back(IR);
|
||||
}
|
||||
|
||||
// Release the buffered resources and issue the instruction.
|
||||
@ -308,92 +308,109 @@ void Scheduler::issueInstruction(
|
||||
issueInstructionImpl(IR, UsedResources);
|
||||
}
|
||||
|
||||
void Scheduler::promoteToReadyQueue(SmallVectorImpl<InstRef> &Ready) {
|
||||
void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
|
||||
// Scan the set of waiting instructions and promote them to the
|
||||
// ready queue if operands are all ready.
|
||||
for (auto I = WaitQueue.begin(), E = WaitQueue.end(); I != E;) {
|
||||
const unsigned IID = I->first;
|
||||
Instruction *IS = I->second;
|
||||
unsigned RemovedElements = 0;
|
||||
for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E; ) {
|
||||
InstRef &IR = *I;
|
||||
if (!IR.isValid())
|
||||
break;
|
||||
|
||||
// Check if this instruction is now ready. In case, force
|
||||
// a transition in state using method 'update()'.
|
||||
if (!IS->isReady())
|
||||
IS->update();
|
||||
Instruction &IS = *IR.getInstruction();
|
||||
if (!IS.isReady())
|
||||
IS.update();
|
||||
|
||||
const InstrDesc &Desc = IS->getDesc();
|
||||
const InstrDesc &Desc = IS.getDesc();
|
||||
bool IsMemOp = Desc.MayLoad || Desc.MayStore;
|
||||
if (!IS->isReady() || (IsMemOp && !LSU->isReady({IID, IS}))) {
|
||||
if (!IS.isReady() || (IsMemOp && !LSU->isReady(IR))) {
|
||||
++I;
|
||||
continue;
|
||||
}
|
||||
|
||||
Ready.emplace_back(IID, IS);
|
||||
ReadyQueue[IID] = IS;
|
||||
auto ToRemove = I;
|
||||
++I;
|
||||
WaitQueue.erase(ToRemove);
|
||||
Ready.emplace_back(IR);
|
||||
ReadySet.emplace_back(IR);
|
||||
|
||||
IR.invalidate();
|
||||
++RemovedElements;
|
||||
std::iter_swap(I, E - RemovedElements);
|
||||
}
|
||||
|
||||
WaitSet.resize(WaitSet.size() - RemovedElements);
|
||||
}
|
||||
|
||||
InstRef Scheduler::select() {
|
||||
// Find the oldest ready-to-issue instruction in the ReadyQueue.
|
||||
auto It = std::find_if(ReadyQueue.begin(), ReadyQueue.end(),
|
||||
[&](const QueueEntryTy &Entry) {
|
||||
const InstrDesc &D = Entry.second->getDesc();
|
||||
return Resources->canBeIssued(D);
|
||||
});
|
||||
unsigned QueueIndex = ReadySet.size();
|
||||
int Rank = std::numeric_limits<int>::max();
|
||||
|
||||
if (It == ReadyQueue.end())
|
||||
return {0, nullptr};
|
||||
for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) {
|
||||
const InstRef &IR = ReadySet[I];
|
||||
const unsigned IID = IR.getSourceIndex();
|
||||
const Instruction &IS = *IR.getInstruction();
|
||||
|
||||
// We want to prioritize older instructions over younger instructions to
|
||||
// minimize the pressure on the reorder buffer. We also want to
|
||||
// rank higher the instructions with more users to better expose ILP.
|
||||
// Compute a rank value based on the age of an instruction (i.e. its source
|
||||
// index) and its number of users. The lower the rank value, the better.
|
||||
int CurrentRank = IID - IS.getNumUsers();
|
||||
|
||||
// Compute a rank value based on the age of an instruction (i.e. its source
|
||||
// index) and its number of users. The lower the rank value, the better.
|
||||
int Rank = It->first - It->second->getNumUsers();
|
||||
for (auto I = It, E = ReadyQueue.end(); I != E; ++I) {
|
||||
int CurrentRank = I->first - I->second->getNumUsers();
|
||||
if (CurrentRank < Rank) {
|
||||
const InstrDesc &D = I->second->getDesc();
|
||||
// We want to prioritize older instructions over younger instructions to
|
||||
// minimize the pressure on the reorder buffer. We also want to
|
||||
// rank higher the instructions with more users to better expose ILP.
|
||||
if (CurrentRank == Rank)
|
||||
if (IID > ReadySet[QueueIndex].getSourceIndex())
|
||||
continue;
|
||||
|
||||
if (CurrentRank <= Rank) {
|
||||
const InstrDesc &D = IS.getDesc();
|
||||
if (Resources->canBeIssued(D)) {
|
||||
Rank = CurrentRank;
|
||||
It = I;
|
||||
QueueIndex = I;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (QueueIndex == ReadySet.size())
|
||||
return InstRef();
|
||||
|
||||
// We found an instruction to issue.
|
||||
InstRef IR(It->first, It->second);
|
||||
ReadyQueue.erase(It);
|
||||
|
||||
InstRef IR = ReadySet[QueueIndex];
|
||||
std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]);
|
||||
ReadySet.pop_back();
|
||||
return IR;
|
||||
}
|
||||
|
||||
void Scheduler::updatePendingQueue(SmallVectorImpl<InstRef> &Ready) {
|
||||
// Notify to instructions in the pending queue that a new cycle just
|
||||
// started.
|
||||
for (QueueEntryTy Entry : WaitQueue)
|
||||
Entry.second->cycleEvent();
|
||||
promoteToReadyQueue(Ready);
|
||||
for (InstRef &Entry : WaitSet)
|
||||
Entry.getInstruction()->cycleEvent();
|
||||
promoteToReadySet(Ready);
|
||||
}
|
||||
|
||||
void Scheduler::updateIssuedQueue(SmallVectorImpl<InstRef> &Executed) {
|
||||
for (auto I = IssuedQueue.begin(), E = IssuedQueue.end(); I != E;) {
|
||||
const QueueEntryTy Entry = *I;
|
||||
Instruction *IS = Entry.second;
|
||||
IS->cycleEvent();
|
||||
if (IS->isExecuted()) {
|
||||
Executed.push_back({Entry.first, Entry.second});
|
||||
auto ToRemove = I;
|
||||
++I;
|
||||
IssuedQueue.erase(ToRemove);
|
||||
} else {
|
||||
LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << Entry.first
|
||||
void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) {
|
||||
unsigned RemovedElements = 0;
|
||||
for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E; ) {
|
||||
InstRef &IR = *I;
|
||||
if (!IR.isValid())
|
||||
break;
|
||||
Instruction &IS = *IR.getInstruction();
|
||||
IS.cycleEvent();
|
||||
if (!IS.isExecuted()) {
|
||||
LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
|
||||
<< " is still executing.\n");
|
||||
++I;
|
||||
continue;
|
||||
}
|
||||
|
||||
Executed.emplace_back(IR);
|
||||
++RemovedElements;
|
||||
IR.invalidate();
|
||||
std::iter_swap(I, E - RemovedElements);
|
||||
}
|
||||
|
||||
IssuedSet.resize(IssuedSet.size() - RemovedElements);
|
||||
}
|
||||
|
||||
void Scheduler::onInstructionExecuted(const InstRef &IR) {
|
||||
@ -408,9 +425,8 @@ bool Scheduler::reserveResources(InstRef &IR) {
|
||||
// If necessary, reserve queue entries in the load-store unit (LSU).
|
||||
const bool Reserved = LSU->reserve(IR);
|
||||
if (!IR.getInstruction()->isReady() || (Reserved && !LSU->isReady(IR))) {
|
||||
LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR
|
||||
<< " to the Wait Queue\n");
|
||||
WaitQueue[IR.getSourceIndex()] = IR.getInstruction();
|
||||
LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
|
||||
WaitSet.push_back(IR);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -419,9 +435,8 @@ bool Scheduler::reserveResources(InstRef &IR) {
|
||||
bool Scheduler::issueImmediately(InstRef &IR) {
|
||||
const InstrDesc &Desc = IR.getInstruction()->getDesc();
|
||||
if (!Desc.isZeroLatency() && !Resources->mustIssueImmediately(Desc)) {
|
||||
LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR
|
||||
<< " to the Ready Queue\n");
|
||||
ReadyQueue[IR.getSourceIndex()] = IR.getInstruction();
|
||||
LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n");
|
||||
ReadySet.push_back(IR);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -342,30 +342,28 @@ public:
|
||||
}; // namespace mca
|
||||
|
||||
/// Class Scheduler is responsible for issuing instructions to pipeline
|
||||
/// resources. Internally, it delegates to a ResourceManager the management of
|
||||
/// processor resources.
|
||||
/// This class is also responsible for tracking the progress of instructions
|
||||
/// from the dispatch stage, until the write-back stage.
|
||||
/// resources.
|
||||
///
|
||||
/// An nstruction dispatched to the Scheduler is initially placed into either
|
||||
/// the 'WaitQueue' or the 'ReadyQueue' depending on the availability of the
|
||||
/// input operands. Instructions in the WaitQueue are ordered by instruction
|
||||
/// index. An instruction is moved from the WaitQueue to the ReadyQueue when
|
||||
/// register operands become available, and all memory dependencies are met.
|
||||
/// Instructions that are moved from the WaitQueue to the ReadyQueue transition
|
||||
/// from state 'IS_AVAILABLE' to state 'IS_READY'.
|
||||
/// Internally, it delegates to a ResourceManager the management of processor
|
||||
/// resources. This class is also responsible for tracking the progress of
|
||||
/// instructions from the dispatch stage, until the write-back stage.
|
||||
///
|
||||
/// At the beginning of each cycle, the Scheduler checks if there are
|
||||
/// instructions in the WaitQueue that can be moved to the ReadyQueue. If the
|
||||
/// ReadyQueue is not empty, then older instructions from the queue are issued
|
||||
/// to the processor pipelines, and the underlying ResourceManager is updated
|
||||
/// accordingly. The ReadyQueue is ordered by instruction index to guarantee
|
||||
/// that the first instructions in the set are also the oldest.
|
||||
/// An instruction dispatched to the Scheduler is initially placed into either
|
||||
/// the 'WaitSet' or the 'ReadySet' depending on the availability of the input
|
||||
/// operands.
|
||||
///
|
||||
/// An Instruction is moved from the ReadyQueue the `IssuedQueue` when it is
|
||||
/// issued to a (one or more) pipeline(s). This event also causes an instruction
|
||||
/// state transition (i.e. from state IS_READY, to state IS_EXECUTING).
|
||||
/// An Instruction leaves the IssuedQueue when it reaches the write-back stage.
|
||||
/// An instruction is moved from the WaitSet to the ReadySet when register
|
||||
/// operands become available, and all memory dependencies are met.
|
||||
/// Instructions that are moved from the WaitSet to the ReadySet transition
|
||||
/// in state from 'IS_AVAILABLE' to 'IS_READY'.
|
||||
///
|
||||
/// On every cycle, the Scheduler checks if it can promote instructions from the
|
||||
/// WaitSet to the ReadySet.
|
||||
///
|
||||
/// An Instruction is moved from the ReadySet the `IssuedSet` when it is issued
|
||||
/// to a (one or more) pipeline(s). This event also causes an instruction state
|
||||
/// transition (i.e. from state IS_READY, to state IS_EXECUTING). An Instruction
|
||||
/// leaves the IssuedSet when it reaches the write-back stage.
|
||||
class Scheduler : public HardwareUnit {
|
||||
const llvm::MCSchedModel &SM;
|
||||
|
||||
@ -373,10 +371,9 @@ class Scheduler : public HardwareUnit {
|
||||
std::unique_ptr<ResourceManager> Resources;
|
||||
std::unique_ptr<LSUnit> LSU;
|
||||
|
||||
using QueueEntryTy = std::pair<unsigned, Instruction *>;
|
||||
std::map<unsigned, Instruction *> WaitQueue;
|
||||
std::map<unsigned, Instruction *> ReadyQueue;
|
||||
std::map<unsigned, Instruction *> IssuedQueue;
|
||||
std::vector<InstRef> WaitSet;
|
||||
std::vector<InstRef> ReadySet;
|
||||
std::vector<InstRef> IssuedSet;
|
||||
|
||||
/// Issue an instruction without updating the ready queue.
|
||||
void issueInstructionImpl(
|
||||
@ -412,7 +409,7 @@ public:
|
||||
/// zero-latency instructions).
|
||||
///
|
||||
/// Returns true if the instruction is issued immediately. If this does not
|
||||
/// occur, then the instruction will be added to the Scheduler's ReadyQueue.
|
||||
/// occur, then the instruction will be added to the Scheduler's ReadySet.
|
||||
bool issueImmediately(InstRef &IR);
|
||||
|
||||
/// Reserve one entry in each buffered resource.
|
||||
@ -431,15 +428,15 @@ public:
|
||||
/// once they have been fully consumed.
|
||||
void reclaimSimulatedResources(llvm::SmallVectorImpl<ResourceRef> &Freed);
|
||||
|
||||
/// Move instructions from the WaitQueue to the ReadyQueue if input operands
|
||||
/// Move instructions from the WaitSet to the ReadySet if input operands
|
||||
/// are all available.
|
||||
void promoteToReadyQueue(llvm::SmallVectorImpl<InstRef> &Ready);
|
||||
void promoteToReadySet(llvm::SmallVectorImpl<InstRef> &Ready);
|
||||
|
||||
/// Update the ready queue.
|
||||
void updatePendingQueue(llvm::SmallVectorImpl<InstRef> &Ready);
|
||||
|
||||
/// Update the issued queue.
|
||||
void updateIssuedQueue(llvm::SmallVectorImpl<InstRef> &Executed);
|
||||
void updateIssuedSet(llvm::SmallVectorImpl<InstRef> &Executed);
|
||||
|
||||
/// Updates the Scheduler's resources to reflect that an instruction has just
|
||||
/// been executed.
|
||||
@ -456,7 +453,7 @@ public:
|
||||
/// execute the given instruction immediately.
|
||||
bool reserveResources(InstRef &IR);
|
||||
|
||||
/// Select the next instruction to issue from the ReadyQueue.
|
||||
/// Select the next instruction to issue from the ReadySet.
|
||||
/// This method gives priority to older instructions.
|
||||
InstRef select();
|
||||
|
||||
@ -467,10 +464,9 @@ public:
|
||||
// This routine performs a sanity check. This routine should only be called
|
||||
// when we know that 'IR' is not in the scheduler's instruction queues.
|
||||
void sanityCheck(const InstRef &IR) const {
|
||||
const unsigned Idx = IR.getSourceIndex();
|
||||
assert(WaitQueue.find(Idx) == WaitQueue.end());
|
||||
assert(ReadyQueue.find(Idx) == ReadyQueue.end());
|
||||
assert(IssuedQueue.find(Idx) == IssuedQueue.end());
|
||||
assert(llvm::find(WaitSet, IR) == WaitSet.end());
|
||||
assert(llvm::find(ReadySet, IR) == ReadySet.end());
|
||||
assert(llvm::find(IssuedSet, IR) == IssuedSet.end());
|
||||
}
|
||||
#endif // !NDEBUG
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user