ConstProp: swallow up InlineCallOptimization

No reason to have a separate pass for this, merging should be a bit faster since
it eliminates an IR walk.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
Alyssa Rosenzweig 2024-06-03 13:52:19 -04:00
parent 7d5cee4384
commit 6b9293979c
7 changed files with 94 additions and 132 deletions

View File

@ -145,7 +145,6 @@ set (SRCS
Interface/IR/Passes/RedundantFlagCalculationElimination.cpp
Interface/IR/Passes/DeadStoreElimination.cpp
Interface/IR/Passes/RegisterAllocationPass.cpp
Interface/IR/Passes/InlineCallOptimization.cpp
Utils/Telemetry.cpp
Utils/Threads.cpp
Utils/Profiler.cpp

View File

@ -79,8 +79,7 @@ void PassManager::AddDefaultPasses(FEXCore::Context::ContextImpl* ctx, bool Inli
}
InsertPass(CreateDeadStoreElimination());
InsertPass(CreateConstProp(InlineConstants, ctx->HostFeatures.SupportsTSOImm9));
InsertPass(CreateInlineCallOptimization(&ctx->CPUID));
InsertPass(CreateConstProp(InlineConstants, ctx->HostFeatures.SupportsTSOImm9, &ctx->CPUID));
InsertPass(CreateDeadFlagCalculationEliminination());
}
}

View File

@ -43,7 +43,7 @@ protected:
};
class PassManager final {
friend class InlineCallOptimization;
friend class ConstProp;
public:
void AddDefaultPasses(FEXCore::Context::ContextImpl* ctx, bool InlineConstants);
void AddDefaultValidationPasses();

View File

@ -16,9 +16,8 @@ class Pass;
class RegisterAllocationPass;
class RegisterAllocationData;
fextl::unique_ptr<FEXCore::IR::Pass> CreateConstProp(bool InlineConstants, bool SupportsTSOImm9);
fextl::unique_ptr<FEXCore::IR::Pass> CreateConstProp(bool InlineConstants, bool SupportsTSOImm9, const FEXCore::CPUIDEmu* CPUID);
fextl::unique_ptr<FEXCore::IR::Pass> CreateContextLoadStoreElimination(bool SupportsAVX);
fextl::unique_ptr<FEXCore::IR::Pass> CreateInlineCallOptimization(const FEXCore::CPUIDEmu* CPUID);
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadFlagCalculationEliminination();
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadStoreElimination();
fextl::unique_ptr<FEXCore::IR::RegisterAllocationPass> CreateRegisterAllocationPass();

View File

@ -15,8 +15,10 @@ $end_info$
#include "Interface/IR/IREmitter.h"
#include "Interface/IR/PassManager.h"
#include "Interface/Core/CPUID.h"
#include <FEXCore/IR/IR.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/fextl/map.h>
@ -73,9 +75,10 @@ static bool IsBfeAlreadyDone(IREmitter* IREmit, OrderedNodeWrapper src, uint64_t
class ConstProp final : public FEXCore::IR::Pass {
public:
explicit ConstProp(bool DoInlineConstants, bool SupportsTSOImm9)
explicit ConstProp(bool DoInlineConstants, bool SupportsTSOImm9, const FEXCore::CPUIDEmu* CPUID)
: InlineConstants(DoInlineConstants)
, SupportsTSOImm9 {SupportsTSOImm9} {}
, SupportsTSOImm9 {SupportsTSOImm9}
, CPUID {CPUID} {}
void Run(IREmitter* IREmit) override;
@ -104,6 +107,7 @@ private:
return Result.first->second;
}
bool SupportsTSOImm9 {};
const FEXCore::CPUIDEmu* CPUID;
// This is a heuristic to limit constant pool live ranges to reduce RA interference pressure.
// If the range is unbounded then RA interference pressure seems to increase to the point
// that long blocks of constant usage can slow to a crawl.
@ -508,6 +512,89 @@ void ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& Current
}
break;
}
case OP_SYSCALL: {
auto Op = IROp->CW<IR::IROp_Syscall>();
// Is the first argument a constant?
uint64_t Constant;
if (IREmit->IsValueConstant(Op->SyscallID, &Constant)) {
auto SyscallDef = Manager->SyscallHandler->GetSyscallABI(Constant);
auto SyscallFlags = Manager->SyscallHandler->GetSyscallFlags(Constant);
// Update the syscall flags
Op->Flags = SyscallFlags;
// XXX: Once we have the ability to do real function calls then we can call directly in to the syscall handler
if (SyscallDef.NumArgs < FEXCore::HLE::SyscallArguments::MAX_ARGS) {
// If the number of args are less than what the IR op supports then we can remove arg usage
// We need +1 since we are still passing in syscall number here
for (uint8_t Arg = (SyscallDef.NumArgs + 1); Arg < FEXCore::HLE::SyscallArguments::MAX_ARGS; ++Arg) {
IREmit->ReplaceNodeArgument(CodeNode, Arg, IREmit->Invalid());
}
// Replace syscall with inline passthrough syscall if we can
if (SyscallDef.HostSyscallNumber != -1) {
IREmit->SetWriteCursor(CodeNode);
// Skip Args[0] since that is the syscallid
auto InlineSyscall =
IREmit->_InlineSyscall(CurrentIR.GetNode(IROp->Args[1]), CurrentIR.GetNode(IROp->Args[2]), CurrentIR.GetNode(IROp->Args[3]),
CurrentIR.GetNode(IROp->Args[4]), CurrentIR.GetNode(IROp->Args[5]), CurrentIR.GetNode(IROp->Args[6]),
SyscallDef.HostSyscallNumber, Op->Flags);
// Replace all syscall uses with this inline one
IREmit->ReplaceAllUsesWith(CodeNode, InlineSyscall);
// We must remove here since DCE can't remove a IROp with sideeffects
IREmit->Remove(CodeNode);
}
}
}
break;
}
case OP_CPUID: {
auto Op = IROp->CW<IR::IROp_CPUID>();
uint64_t ConstantFunction {}, ConstantLeaf {};
bool IsConstantFunction = IREmit->IsValueConstant(Op->Function, &ConstantFunction);
bool IsConstantLeaf = IREmit->IsValueConstant(Op->Leaf, &ConstantLeaf);
// If the CPUID function is constant then we can try and optimize.
if (IsConstantFunction) { // && ConstantFunction != 1) {
// Check if it supports constant data reporting for this function.
const auto SupportsConstant = CPUID->DoesFunctionReportConstantData(ConstantFunction);
if (SupportsConstant.SupportsConstantFunction == CPUIDEmu::SupportsConstant::CONSTANT) {
// If the CPUID needs a constant leaf to be optimized then this can't work if we didn't const-prop the leaf register.
if (!(SupportsConstant.NeedsLeaf == CPUIDEmu::NeedsLeafConstant::NEEDSLEAFCONSTANT && !IsConstantLeaf)) {
// Calculate the constant data and replace all uses.
// DCE will remove the CPUID IR operation.
const auto ConstantCPUIDResult = CPUID->RunFunction(ConstantFunction, ConstantLeaf);
uint64_t ResultsLower = (static_cast<uint64_t>(ConstantCPUIDResult.ebx) << 32) | ConstantCPUIDResult.eax;
uint64_t ResultsUpper = (static_cast<uint64_t>(ConstantCPUIDResult.edx) << 32) | ConstantCPUIDResult.ecx;
IREmit->SetWriteCursor(CodeNode);
auto ElementPair = IREmit->_CreateElementPair(IR::OpSize::i128Bit, IREmit->_Constant(ResultsLower), IREmit->_Constant(ResultsUpper));
// Replace all CPUID uses with this inline one
IREmit->ReplaceAllUsesWith(CodeNode, ElementPair);
}
}
}
break;
}
case OP_XGETBV: {
auto Op = IROp->CW<IR::IROp_XGetBV>();
uint64_t ConstantFunction {};
if (IREmit->IsValueConstant(Op->Function, &ConstantFunction) && CPUID->DoesXCRFunctionReportConstantData(ConstantFunction)) {
const auto ConstantXCRResult = CPUID->RunXCRFunction(ConstantFunction);
IREmit->SetWriteCursor(CodeNode);
auto ElementPair =
IREmit->_CreateElementPair(IR::OpSize::i64Bit, IREmit->_Constant(ConstantXCRResult.eax), IREmit->_Constant(ConstantXCRResult.edx));
// Replace all xgetbv uses with this inline one
IREmit->ReplaceAllUsesWith(CodeNode, ElementPair);
}
break;
}
default: break;
}
}
@ -766,7 +853,7 @@ void ConstProp::Run(IREmitter* IREmit) {
}
}
fextl::unique_ptr<FEXCore::IR::Pass> CreateConstProp(bool InlineConstants, bool SupportsTSOImm9) {
return fextl::make_unique<ConstProp>(InlineConstants, SupportsTSOImm9);
fextl::unique_ptr<FEXCore::IR::Pass> CreateConstProp(bool InlineConstants, bool SupportsTSOImm9, const FEXCore::CPUIDEmu* CPUID) {
return fextl::make_unique<ConstProp>(InlineConstants, SupportsTSOImm9, CPUID);
}
} // namespace FEXCore::IR

View File

@ -1,121 +0,0 @@
// SPDX-License-Identifier: MIT
/*
$info$
tags: ir|opts
desc: Removes unused arguments if known syscall number
$end_info$
*/
#include "Interface/Core/CPUID.h"
#include "Interface/IR/IREmitter.h"
#include "Interface/IR/PassManager.h"
#include <FEXCore/IR/IR.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/Utils/Profiler.h>
#include <memory>
#include <stdint.h>
namespace FEXCore::IR {
class InlineCallOptimization final : public FEXCore::IR::Pass {
public:
InlineCallOptimization(const FEXCore::CPUIDEmu* CPUID)
: CPUID {CPUID} {}
void Run(IREmitter* IREmit) override;
private:
const FEXCore::CPUIDEmu* CPUID;
};
void InlineCallOptimization::Run(IREmitter* IREmit) {
FEXCORE_PROFILE_SCOPED("PassManager::SyscallOpt");
auto CurrentIR = IREmit->ViewIR();
for (auto [CodeNode, IROp] : CurrentIR.GetAllCode()) {
if (IROp->Op == FEXCore::IR::OP_SYSCALL) {
auto Op = IROp->CW<IR::IROp_Syscall>();
// Is the first argument a constant?
uint64_t Constant;
if (IREmit->IsValueConstant(Op->SyscallID, &Constant)) {
auto SyscallDef = Manager->SyscallHandler->GetSyscallABI(Constant);
auto SyscallFlags = Manager->SyscallHandler->GetSyscallFlags(Constant);
// Update the syscall flags
Op->Flags = SyscallFlags;
// XXX: Once we have the ability to do real function calls then we can call directly in to the syscall handler
if (SyscallDef.NumArgs < FEXCore::HLE::SyscallArguments::MAX_ARGS) {
// If the number of args are less than what the IR op supports then we can remove arg usage
// We need +1 since we are still passing in syscall number here
for (uint8_t Arg = (SyscallDef.NumArgs + 1); Arg < FEXCore::HLE::SyscallArguments::MAX_ARGS; ++Arg) {
IREmit->ReplaceNodeArgument(CodeNode, Arg, IREmit->Invalid());
}
// Replace syscall with inline passthrough syscall if we can
if (SyscallDef.HostSyscallNumber != -1) {
IREmit->SetWriteCursor(CodeNode);
// Skip Args[0] since that is the syscallid
auto InlineSyscall =
IREmit->_InlineSyscall(CurrentIR.GetNode(IROp->Args[1]), CurrentIR.GetNode(IROp->Args[2]), CurrentIR.GetNode(IROp->Args[3]),
CurrentIR.GetNode(IROp->Args[4]), CurrentIR.GetNode(IROp->Args[5]), CurrentIR.GetNode(IROp->Args[6]),
SyscallDef.HostSyscallNumber, Op->Flags);
// Replace all syscall uses with this inline one
IREmit->ReplaceAllUsesWith(CodeNode, InlineSyscall);
// We must remove here since DCE can't remove a IROp with sideeffects
IREmit->Remove(CodeNode);
}
}
}
} else if (IROp->Op == FEXCore::IR::OP_CPUID) {
auto Op = IROp->CW<IR::IROp_CPUID>();
uint64_t ConstantFunction {}, ConstantLeaf {};
bool IsConstantFunction = IREmit->IsValueConstant(Op->Function, &ConstantFunction);
bool IsConstantLeaf = IREmit->IsValueConstant(Op->Leaf, &ConstantLeaf);
// If the CPUID function is constant then we can try and optimize.
if (IsConstantFunction) { // && ConstantFunction != 1) {
// Check if it supports constant data reporting for this function.
const auto SupportsConstant = CPUID->DoesFunctionReportConstantData(ConstantFunction);
if (SupportsConstant.SupportsConstantFunction == CPUIDEmu::SupportsConstant::CONSTANT) {
// If the CPUID needs a constant leaf to be optimized then this can't work if we didn't const-prop the leaf register.
if (!(SupportsConstant.NeedsLeaf == CPUIDEmu::NeedsLeafConstant::NEEDSLEAFCONSTANT && !IsConstantLeaf)) {
// Calculate the constant data and replace all uses.
// DCE will remove the CPUID IR operation.
const auto ConstantCPUIDResult = CPUID->RunFunction(ConstantFunction, ConstantLeaf);
uint64_t ResultsLower = (static_cast<uint64_t>(ConstantCPUIDResult.ebx) << 32) | ConstantCPUIDResult.eax;
uint64_t ResultsUpper = (static_cast<uint64_t>(ConstantCPUIDResult.edx) << 32) | ConstantCPUIDResult.ecx;
IREmit->SetWriteCursor(CodeNode);
auto ElementPair = IREmit->_CreateElementPair(IR::OpSize::i128Bit, IREmit->_Constant(ResultsLower), IREmit->_Constant(ResultsUpper));
// Replace all CPUID uses with this inline one
IREmit->ReplaceAllUsesWith(CodeNode, ElementPair);
}
}
}
}
else if (IROp->Op == FEXCore::IR::OP_XGETBV) {
auto Op = IROp->CW<IR::IROp_XGetBV>();
uint64_t ConstantFunction {};
if (IREmit->IsValueConstant(Op->Function, &ConstantFunction) && CPUID->DoesXCRFunctionReportConstantData(ConstantFunction)) {
const auto ConstantXCRResult = CPUID->RunXCRFunction(ConstantFunction);
IREmit->SetWriteCursor(CodeNode);
auto ElementPair =
IREmit->_CreateElementPair(IR::OpSize::i64Bit, IREmit->_Constant(ConstantXCRResult.eax), IREmit->_Constant(ConstantXCRResult.edx));
// Replace all xgetbv uses with this inline one
IREmit->ReplaceAllUsesWith(CodeNode, ElementPair);
}
}
}
}
fextl::unique_ptr<FEXCore::IR::Pass> CreateInlineCallOptimization(const FEXCore::CPUIDEmu* CPUID) {
return fextl::make_unique<InlineCallOptimization>(CPUID);
}
} // namespace FEXCore::IR

View File

@ -112,7 +112,6 @@ IR to IR Optimization
- [DeadContextStoreElimination.cpp](../FEXCore/Source/Interface/IR/Passes/DeadContextStoreElimination.cpp): Transforms ContextLoad/Store to temporaries, similar to mem2reg
- [DeadStoreElimination.cpp](../FEXCore/Source/Interface/IR/Passes/DeadStoreElimination.cpp): Cross block store-after-store elimination
- [IRValidation.cpp](../FEXCore/Source/Interface/IR/Passes/IRValidation.cpp): Sanity checking pass
- [InlineCallOptimization.cpp](../FEXCore/Source/Interface/IR/Passes/InlineCallOptimization.cpp): Removes unused arguments if known syscall number
- [LongDivideRemovalPass.cpp](../FEXCore/Source/Interface/IR/Passes/LongDivideRemovalPass.cpp): Long divide elimination pass
- [RedundantFlagCalculationElimination.cpp](../FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp): This is not used right now, possibly broken
- [RegisterAllocationPass.cpp](../FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp)