mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-03-01 11:18:42 +00:00
IR: merge general DCE with flag DCE
Flag DCE needs to do general DCE anyway to converge in one pass. So we can move the special syscall/atomic logic over to flag DCE and then drop the second DCE pass altogether. Now local dead code of both is eliminated in a single pass. Flag DCE is carefully written to converge in a single iteration which makes this scheme work. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
parent
cf77f2ae5d
commit
cb00d9171f
@ -137,7 +137,6 @@ set (SRCS
|
||||
Interface/IR/IREmitter.cpp
|
||||
Interface/IR/PassManager.cpp
|
||||
Interface/IR/Passes/ConstProp.cpp
|
||||
Interface/IR/Passes/DeadCodeElimination.cpp
|
||||
Interface/IR/Passes/DeadContextStoreElimination.cpp
|
||||
Interface/IR/Passes/IRDumperPass.cpp
|
||||
Interface/IR/Passes/IRValidation.cpp
|
||||
|
@ -79,13 +79,9 @@ void PassManager::AddDefaultPasses(FEXCore::Context::ContextImpl* ctx, bool Inli
|
||||
}
|
||||
|
||||
InsertPass(CreateDeadStoreElimination());
|
||||
InsertPass(CreatePassDeadCodeElimination());
|
||||
InsertPass(CreateConstProp(InlineConstants, ctx->HostFeatures.SupportsTSOImm9, Is64BitMode()));
|
||||
|
||||
InsertPass(CreateDeadFlagCalculationEliminination());
|
||||
|
||||
InsertPass(CreateInlineCallOptimization(&ctx->CPUID));
|
||||
InsertPass(CreatePassDeadCodeElimination());
|
||||
InsertPass(CreateDeadFlagCalculationEliminination());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,6 @@ fextl::unique_ptr<FEXCore::IR::Pass> CreateContextLoadStoreElimination(bool Supp
|
||||
fextl::unique_ptr<FEXCore::IR::Pass> CreateInlineCallOptimization(const FEXCore::CPUIDEmu* CPUID);
|
||||
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadFlagCalculationEliminination();
|
||||
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadStoreElimination();
|
||||
fextl::unique_ptr<FEXCore::IR::Pass> CreatePassDeadCodeElimination();
|
||||
fextl::unique_ptr<FEXCore::IR::RegisterAllocationPass> CreateRegisterAllocationPass();
|
||||
fextl::unique_ptr<FEXCore::IR::Pass> CreateLongDivideEliminationPass();
|
||||
|
||||
|
@ -1,108 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
$info$
|
||||
tags: ir|opts
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/IR/IREmitter.h"
|
||||
#include "Interface/IR/PassManager.h"
|
||||
|
||||
#include <FEXCore/IR/IR.h>
|
||||
#include <FEXCore/Utils/Profiler.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace FEXCore::IR {
|
||||
|
||||
class DeadCodeElimination final : public FEXCore::IR::Pass {
|
||||
void Run(IREmitter* IREmit) override;
|
||||
|
||||
private:
|
||||
void markUsed(OrderedNodeWrapper* CodeOp, IROp_Header* IROp);
|
||||
};
|
||||
|
||||
void DeadCodeElimination::Run(IREmitter* IREmit) {
|
||||
FEXCORE_PROFILE_SCOPED("PassManager::DCE");
|
||||
auto CurrentIR = IREmit->ViewIR();
|
||||
|
||||
for (auto [BlockNode, BlockHeader] : CurrentIR.GetBlocks()) {
|
||||
|
||||
// Reverse iteration is not yet working with the iterators
|
||||
auto BlockIROp = BlockHeader->CW<FEXCore::IR::IROp_CodeBlock>();
|
||||
|
||||
// We grab these nodes this way so we can iterate easily
|
||||
auto CodeBegin = CurrentIR.at(BlockIROp->Begin);
|
||||
auto CodeLast = CurrentIR.at(BlockIROp->Last);
|
||||
|
||||
while (1) {
|
||||
auto [CodeNode, IROp] = CodeLast();
|
||||
|
||||
bool HasSideEffects = IR::HasSideEffects(IROp->Op);
|
||||
|
||||
switch (IROp->Op) {
|
||||
case OP_SYSCALL:
|
||||
case OP_INLINESYSCALL: {
|
||||
FEXCore::IR::SyscallFlags Flags {};
|
||||
if (IROp->Op == OP_SYSCALL) {
|
||||
auto Op = IROp->C<IR::IROp_Syscall>();
|
||||
Flags = Op->Flags;
|
||||
} else {
|
||||
auto Op = IROp->C<IR::IROp_InlineSyscall>();
|
||||
Flags = Op->Flags;
|
||||
}
|
||||
|
||||
if ((Flags & FEXCore::IR::SyscallFlags::NOSIDEEFFECTS) == FEXCore::IR::SyscallFlags::NOSIDEEFFECTS) {
|
||||
HasSideEffects = false;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case OP_ATOMICFETCHADD:
|
||||
case OP_ATOMICFETCHSUB:
|
||||
case OP_ATOMICFETCHAND:
|
||||
case OP_ATOMICFETCHCLR:
|
||||
case OP_ATOMICFETCHOR:
|
||||
case OP_ATOMICFETCHXOR:
|
||||
case OP_ATOMICFETCHNEG: {
|
||||
// If the result of the atomic fetch is completely unused, convert it to a non-fetching atomic operation.
|
||||
if (CodeNode->GetUses() == 0) {
|
||||
switch (IROp->Op) {
|
||||
case OP_ATOMICFETCHADD: IROp->Op = OP_ATOMICADD; break;
|
||||
case OP_ATOMICFETCHSUB: IROp->Op = OP_ATOMICSUB; break;
|
||||
case OP_ATOMICFETCHAND: IROp->Op = OP_ATOMICAND; break;
|
||||
case OP_ATOMICFETCHCLR: IROp->Op = OP_ATOMICCLR; break;
|
||||
case OP_ATOMICFETCHOR: IROp->Op = OP_ATOMICOR; break;
|
||||
case OP_ATOMICFETCHXOR: IROp->Op = OP_ATOMICXOR; break;
|
||||
case OP_ATOMICFETCHNEG: IROp->Op = OP_ATOMICNEG; break;
|
||||
default: FEX_UNREACHABLE;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
|
||||
// Skip over anything that has side effects
|
||||
// Use count tracking can't safely remove anything with side effects
|
||||
if (!HasSideEffects) {
|
||||
if (CodeNode->GetUses() == 0) {
|
||||
IREmit->Remove(CodeNode);
|
||||
}
|
||||
}
|
||||
|
||||
if (CodeLast == CodeBegin) {
|
||||
break;
|
||||
}
|
||||
--CodeLast;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DeadCodeElimination::markUsed(OrderedNodeWrapper* CodeOp, IROp_Header* IROp) {}
|
||||
|
||||
fextl::unique_ptr<FEXCore::IR::Pass> CreatePassDeadCodeElimination() {
|
||||
return fextl::make_unique<DeadCodeElimination>();
|
||||
}
|
||||
|
||||
} // namespace FEXCore::IR
|
@ -456,12 +456,9 @@ public:
|
||||
explicit RCLSE(bool SupportsAVX_)
|
||||
: SupportsAVX {SupportsAVX_} {
|
||||
ClassifyContextStruct(&ClassifiedStruct, SupportsAVX);
|
||||
DCE = FEXCore::IR::CreatePassDeadCodeElimination();
|
||||
}
|
||||
void Run(FEXCore::IR::IREmitter* IREmit) override;
|
||||
private:
|
||||
fextl::unique_ptr<FEXCore::IR::Pass> DCE;
|
||||
|
||||
ContextInfo ClassifiedStruct;
|
||||
fextl::unordered_map<FEXCore::IR::NodeID, BlockInfo> OffsetToBlockMap;
|
||||
|
||||
|
@ -59,6 +59,7 @@ private:
|
||||
FlagInfo Classify(IROp_Header* Node);
|
||||
unsigned FlagForReg(unsigned Reg);
|
||||
unsigned FlagsForCondClassType(CondClassType Cond);
|
||||
bool EliminateDeadCode(IREmitter* IREmit, OrderedNode* CodeNode, IROp_Header* IROp);
|
||||
};
|
||||
|
||||
unsigned DeadFlagCalculationEliminination::FlagForReg(unsigned Reg) {
|
||||
@ -310,8 +311,68 @@ FlagInfo DeadFlagCalculationEliminination::Classify(IROp_Header* IROp) {
|
||||
return {.Trivial = true};
|
||||
}
|
||||
|
||||
// General purpose dead code elimination. Returns whether flag handling should
|
||||
// be skipped (because it was removed or could not possibly affect flags).
|
||||
bool DeadFlagCalculationEliminination::EliminateDeadCode(IREmitter* IREmit, OrderedNode* CodeNode, IROp_Header* IROp) {
|
||||
bool HasSideEffects = IR::HasSideEffects(IROp->Op);
|
||||
|
||||
switch (IROp->Op) {
|
||||
case OP_SYSCALL:
|
||||
case OP_INLINESYSCALL: {
|
||||
FEXCore::IR::SyscallFlags Flags {};
|
||||
if (IROp->Op == OP_SYSCALL) {
|
||||
auto Op = IROp->C<IR::IROp_Syscall>();
|
||||
Flags = Op->Flags;
|
||||
} else {
|
||||
auto Op = IROp->C<IR::IROp_InlineSyscall>();
|
||||
Flags = Op->Flags;
|
||||
}
|
||||
|
||||
if ((Flags & FEXCore::IR::SyscallFlags::NOSIDEEFFECTS) == FEXCore::IR::SyscallFlags::NOSIDEEFFECTS) {
|
||||
HasSideEffects = false;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case OP_ATOMICFETCHADD:
|
||||
case OP_ATOMICFETCHSUB:
|
||||
case OP_ATOMICFETCHAND:
|
||||
case OP_ATOMICFETCHCLR:
|
||||
case OP_ATOMICFETCHOR:
|
||||
case OP_ATOMICFETCHXOR:
|
||||
case OP_ATOMICFETCHNEG: {
|
||||
// If the result of the atomic fetch is completely unused, convert it to a non-fetching atomic operation.
|
||||
if (CodeNode->GetUses() == 0) {
|
||||
switch (IROp->Op) {
|
||||
case OP_ATOMICFETCHADD: IROp->Op = OP_ATOMICADD; break;
|
||||
case OP_ATOMICFETCHSUB: IROp->Op = OP_ATOMICSUB; break;
|
||||
case OP_ATOMICFETCHAND: IROp->Op = OP_ATOMICAND; break;
|
||||
case OP_ATOMICFETCHCLR: IROp->Op = OP_ATOMICCLR; break;
|
||||
case OP_ATOMICFETCHOR: IROp->Op = OP_ATOMICOR; break;
|
||||
case OP_ATOMICFETCHXOR: IROp->Op = OP_ATOMICXOR; break;
|
||||
case OP_ATOMICFETCHNEG: IROp->Op = OP_ATOMICNEG; break;
|
||||
default: FEX_UNREACHABLE;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
|
||||
// Skip over anything that has side effects
|
||||
// Use count tracking can't safely remove anything with side effects
|
||||
if (!HasSideEffects) {
|
||||
if (CodeNode->GetUses() == 0) {
|
||||
IREmit->Remove(CodeNode);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This pass removes flag calculations that will otherwise be unused INSIDE of that block
|
||||
* @brief This pass removes dead code locally.
|
||||
*/
|
||||
void DeadFlagCalculationEliminination::Run(IREmitter* IREmit) {
|
||||
FEXCORE_PROFILE_SCOPED("PassManager::DFE");
|
||||
@ -337,12 +398,7 @@ void DeadFlagCalculationEliminination::Run(IREmitter* IREmit) {
|
||||
// Optimizing flags can cause earlier flag reads to become dead but dead
|
||||
// flag reads should not impede optimiation of earlier dead flag writes.
|
||||
// We must DCE as we go to ensure we converge in a single iteration.
|
||||
//
|
||||
// TODO: This whole pass could be merged with DCE?
|
||||
bool HasSideEffects = IR::HasSideEffects(IROp->Op);
|
||||
if (!HasSideEffects && CodeNode->GetUses() == 0) {
|
||||
IREmit->Remove(CodeNode);
|
||||
} else {
|
||||
if (!EliminateDeadCode(IREmit, CodeNode, IROp)) {
|
||||
// Optimiation algorithm: For each flag written...
|
||||
//
|
||||
// If the flag has a later read (per FlagsRead), remove the flag from
|
||||
|
@ -22,7 +22,7 @@ class RegisterAllocationData;
|
||||
enum class SyscallFlags : uint8_t {
|
||||
DEFAULT = 0,
|
||||
// Syscalldoesn't care about CPUState being serialized up to the syscall instruction.
|
||||
// Means DeadCodeElimination can optimize through a syscall operation.
|
||||
// Means dead code elimination can optimize through a syscall operation.
|
||||
OPTIMIZETHROUGH = 1 << 0,
|
||||
// Syscall only reads the passed in arguments. Doesn't read CPUState.
|
||||
NOSYNCSTATEONENTRY = 1 << 1,
|
||||
|
@ -109,7 +109,6 @@ IR to IR Optimization
|
||||
- [PassManager.cpp](../FEXCore/Source/Interface/IR/PassManager.cpp): Defines which passes are run, and runs them
|
||||
- [PassManager.h](../FEXCore/Source/Interface/IR/PassManager.h)
|
||||
- [ConstProp.cpp](../FEXCore/Source/Interface/IR/Passes/ConstProp.cpp): ConstProp, ZExt elim, addressgen coalesce, const pooling, fcmp reduction, const inlining
|
||||
- [DeadCodeElimination.cpp](../FEXCore/Source/Interface/IR/Passes/DeadCodeElimination.cpp)
|
||||
- [DeadContextStoreElimination.cpp](../FEXCore/Source/Interface/IR/Passes/DeadContextStoreElimination.cpp): Transforms ContextLoad/Store to temporaries, similar to mem2reg
|
||||
- [DeadStoreElimination.cpp](../FEXCore/Source/Interface/IR/Passes/DeadStoreElimination.cpp): Cross block store-after-store elimination
|
||||
- [IRValidation.cpp](../FEXCore/Source/Interface/IR/Passes/IRValidation.cpp): Sanity checking pass
|
||||
|
Loading…
x
Reference in New Issue
Block a user