IR: merge general DCE with flag DCE

Flag DCE needs to do general DCE anyway to converge in one pass. So we can move
the special syscall/atomic logic over to flag DCE and then drop the second DCE
pass altogether. Now local dead code of both is eliminated in a single pass.

Flag DCE is carefully written to converge in a single iteration which makes this
scheme work.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
Alyssa Rosenzweig 2024-05-19 17:44:25 -04:00
parent cf77f2ae5d
commit cb00d9171f
8 changed files with 65 additions and 127 deletions

View File

@ -137,7 +137,6 @@ set (SRCS
Interface/IR/IREmitter.cpp
Interface/IR/PassManager.cpp
Interface/IR/Passes/ConstProp.cpp
Interface/IR/Passes/DeadCodeElimination.cpp
Interface/IR/Passes/DeadContextStoreElimination.cpp
Interface/IR/Passes/IRDumperPass.cpp
Interface/IR/Passes/IRValidation.cpp

View File

@ -79,13 +79,9 @@ void PassManager::AddDefaultPasses(FEXCore::Context::ContextImpl* ctx, bool Inli
}
InsertPass(CreateDeadStoreElimination());
InsertPass(CreatePassDeadCodeElimination());
InsertPass(CreateConstProp(InlineConstants, ctx->HostFeatures.SupportsTSOImm9, Is64BitMode()));
InsertPass(CreateDeadFlagCalculationEliminination());
InsertPass(CreateInlineCallOptimization(&ctx->CPUID));
InsertPass(CreatePassDeadCodeElimination());
InsertPass(CreateDeadFlagCalculationEliminination());
}
}

View File

@ -21,7 +21,6 @@ fextl::unique_ptr<FEXCore::IR::Pass> CreateContextLoadStoreElimination(bool Supp
fextl::unique_ptr<FEXCore::IR::Pass> CreateInlineCallOptimization(const FEXCore::CPUIDEmu* CPUID);
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadFlagCalculationEliminination();
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadStoreElimination();
fextl::unique_ptr<FEXCore::IR::Pass> CreatePassDeadCodeElimination();
fextl::unique_ptr<FEXCore::IR::RegisterAllocationPass> CreateRegisterAllocationPass();
fextl::unique_ptr<FEXCore::IR::Pass> CreateLongDivideEliminationPass();

View File

@ -1,108 +0,0 @@
// SPDX-License-Identifier: MIT
/*
$info$
tags: ir|opts
$end_info$
*/
#include "Interface/IR/IREmitter.h"
#include "Interface/IR/PassManager.h"
#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/Profiler.h>
#include <memory>
namespace FEXCore::IR {
class DeadCodeElimination final : public FEXCore::IR::Pass {
void Run(IREmitter* IREmit) override;
private:
void markUsed(OrderedNodeWrapper* CodeOp, IROp_Header* IROp);
};
void DeadCodeElimination::Run(IREmitter* IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::DCE");
auto CurrentIR = IREmit->ViewIR();
for (auto [BlockNode, BlockHeader] : CurrentIR.GetBlocks()) {
// Reverse iteration is not yet working with the iterators
auto BlockIROp = BlockHeader->CW<FEXCore::IR::IROp_CodeBlock>();
// We grab these nodes this way so we can iterate easily
auto CodeBegin = CurrentIR.at(BlockIROp->Begin);
auto CodeLast = CurrentIR.at(BlockIROp->Last);
while (1) {
auto [CodeNode, IROp] = CodeLast();
bool HasSideEffects = IR::HasSideEffects(IROp->Op);
switch (IROp->Op) {
case OP_SYSCALL:
case OP_INLINESYSCALL: {
FEXCore::IR::SyscallFlags Flags {};
if (IROp->Op == OP_SYSCALL) {
auto Op = IROp->C<IR::IROp_Syscall>();
Flags = Op->Flags;
} else {
auto Op = IROp->C<IR::IROp_InlineSyscall>();
Flags = Op->Flags;
}
if ((Flags & FEXCore::IR::SyscallFlags::NOSIDEEFFECTS) == FEXCore::IR::SyscallFlags::NOSIDEEFFECTS) {
HasSideEffects = false;
}
break;
}
case OP_ATOMICFETCHADD:
case OP_ATOMICFETCHSUB:
case OP_ATOMICFETCHAND:
case OP_ATOMICFETCHCLR:
case OP_ATOMICFETCHOR:
case OP_ATOMICFETCHXOR:
case OP_ATOMICFETCHNEG: {
// If the result of the atomic fetch is completely unused, convert it to a non-fetching atomic operation.
if (CodeNode->GetUses() == 0) {
switch (IROp->Op) {
case OP_ATOMICFETCHADD: IROp->Op = OP_ATOMICADD; break;
case OP_ATOMICFETCHSUB: IROp->Op = OP_ATOMICSUB; break;
case OP_ATOMICFETCHAND: IROp->Op = OP_ATOMICAND; break;
case OP_ATOMICFETCHCLR: IROp->Op = OP_ATOMICCLR; break;
case OP_ATOMICFETCHOR: IROp->Op = OP_ATOMICOR; break;
case OP_ATOMICFETCHXOR: IROp->Op = OP_ATOMICXOR; break;
case OP_ATOMICFETCHNEG: IROp->Op = OP_ATOMICNEG; break;
default: FEX_UNREACHABLE;
}
}
break;
}
default: break;
}
// Skip over anything that has side effects
// Use count tracking can't safely remove anything with side effects
if (!HasSideEffects) {
if (CodeNode->GetUses() == 0) {
IREmit->Remove(CodeNode);
}
}
if (CodeLast == CodeBegin) {
break;
}
--CodeLast;
}
}
}
void DeadCodeElimination::markUsed(OrderedNodeWrapper* CodeOp, IROp_Header* IROp) {}
fextl::unique_ptr<FEXCore::IR::Pass> CreatePassDeadCodeElimination() {
return fextl::make_unique<DeadCodeElimination>();
}
} // namespace FEXCore::IR

View File

@ -456,12 +456,9 @@ public:
explicit RCLSE(bool SupportsAVX_)
: SupportsAVX {SupportsAVX_} {
ClassifyContextStruct(&ClassifiedStruct, SupportsAVX);
DCE = FEXCore::IR::CreatePassDeadCodeElimination();
}
void Run(FEXCore::IR::IREmitter* IREmit) override;
private:
fextl::unique_ptr<FEXCore::IR::Pass> DCE;
ContextInfo ClassifiedStruct;
fextl::unordered_map<FEXCore::IR::NodeID, BlockInfo> OffsetToBlockMap;

View File

@ -59,6 +59,7 @@ private:
FlagInfo Classify(IROp_Header* Node);
unsigned FlagForReg(unsigned Reg);
unsigned FlagsForCondClassType(CondClassType Cond);
bool EliminateDeadCode(IREmitter* IREmit, OrderedNode* CodeNode, IROp_Header* IROp);
};
unsigned DeadFlagCalculationEliminination::FlagForReg(unsigned Reg) {
@ -310,8 +311,68 @@ FlagInfo DeadFlagCalculationEliminination::Classify(IROp_Header* IROp) {
return {.Trivial = true};
}
// General purpose dead code elimination. Returns whether flag handling should
// be skipped (because it was removed or could not possibly affect flags).
bool DeadFlagCalculationEliminination::EliminateDeadCode(IREmitter* IREmit, OrderedNode* CodeNode, IROp_Header* IROp) {
bool HasSideEffects = IR::HasSideEffects(IROp->Op);
switch (IROp->Op) {
case OP_SYSCALL:
case OP_INLINESYSCALL: {
FEXCore::IR::SyscallFlags Flags {};
if (IROp->Op == OP_SYSCALL) {
auto Op = IROp->C<IR::IROp_Syscall>();
Flags = Op->Flags;
} else {
auto Op = IROp->C<IR::IROp_InlineSyscall>();
Flags = Op->Flags;
}
if ((Flags & FEXCore::IR::SyscallFlags::NOSIDEEFFECTS) == FEXCore::IR::SyscallFlags::NOSIDEEFFECTS) {
HasSideEffects = false;
}
break;
}
case OP_ATOMICFETCHADD:
case OP_ATOMICFETCHSUB:
case OP_ATOMICFETCHAND:
case OP_ATOMICFETCHCLR:
case OP_ATOMICFETCHOR:
case OP_ATOMICFETCHXOR:
case OP_ATOMICFETCHNEG: {
// If the result of the atomic fetch is completely unused, convert it to a non-fetching atomic operation.
if (CodeNode->GetUses() == 0) {
switch (IROp->Op) {
case OP_ATOMICFETCHADD: IROp->Op = OP_ATOMICADD; break;
case OP_ATOMICFETCHSUB: IROp->Op = OP_ATOMICSUB; break;
case OP_ATOMICFETCHAND: IROp->Op = OP_ATOMICAND; break;
case OP_ATOMICFETCHCLR: IROp->Op = OP_ATOMICCLR; break;
case OP_ATOMICFETCHOR: IROp->Op = OP_ATOMICOR; break;
case OP_ATOMICFETCHXOR: IROp->Op = OP_ATOMICXOR; break;
case OP_ATOMICFETCHNEG: IROp->Op = OP_ATOMICNEG; break;
default: FEX_UNREACHABLE;
}
}
return true;
}
default: break;
}
// Skip over anything that has side effects
// Use count tracking can't safely remove anything with side effects
if (!HasSideEffects) {
if (CodeNode->GetUses() == 0) {
IREmit->Remove(CodeNode);
return true;
}
}
return false;
}
/**
* @brief This pass removes flag calculations that will otherwise be unused INSIDE of that block
* @brief This pass removes dead code locally.
*/
void DeadFlagCalculationEliminination::Run(IREmitter* IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::DFE");
@ -337,12 +398,7 @@ void DeadFlagCalculationEliminination::Run(IREmitter* IREmit) {
// Optimizing flags can cause earlier flag reads to become dead but dead
// flag reads should not impede optimiation of earlier dead flag writes.
// We must DCE as we go to ensure we converge in a single iteration.
//
// TODO: This whole pass could be merged with DCE?
bool HasSideEffects = IR::HasSideEffects(IROp->Op);
if (!HasSideEffects && CodeNode->GetUses() == 0) {
IREmit->Remove(CodeNode);
} else {
if (!EliminateDeadCode(IREmit, CodeNode, IROp)) {
// Optimiation algorithm: For each flag written...
//
// If the flag has a later read (per FlagsRead), remove the flag from

View File

@ -22,7 +22,7 @@ class RegisterAllocationData;
enum class SyscallFlags : uint8_t {
DEFAULT = 0,
// Syscalldoesn't care about CPUState being serialized up to the syscall instruction.
// Means DeadCodeElimination can optimize through a syscall operation.
// Means dead code elimination can optimize through a syscall operation.
OPTIMIZETHROUGH = 1 << 0,
// Syscall only reads the passed in arguments. Doesn't read CPUState.
NOSYNCSTATEONENTRY = 1 << 1,

View File

@ -109,7 +109,6 @@ IR to IR Optimization
- [PassManager.cpp](../FEXCore/Source/Interface/IR/PassManager.cpp): Defines which passes are run, and runs them
- [PassManager.h](../FEXCore/Source/Interface/IR/PassManager.h)
- [ConstProp.cpp](../FEXCore/Source/Interface/IR/Passes/ConstProp.cpp): ConstProp, ZExt elim, addressgen coalesce, const pooling, fcmp reduction, const inlining
- [DeadCodeElimination.cpp](../FEXCore/Source/Interface/IR/Passes/DeadCodeElimination.cpp)
- [DeadContextStoreElimination.cpp](../FEXCore/Source/Interface/IR/Passes/DeadContextStoreElimination.cpp): Transforms ContextLoad/Store to temporaries, similar to mem2reg
- [DeadStoreElimination.cpp](../FEXCore/Source/Interface/IR/Passes/DeadStoreElimination.cpp): Cross block store-after-store elimination
- [IRValidation.cpp](../FEXCore/Source/Interface/IR/Passes/IRValidation.cpp): Sanity checking pass