mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-01-22 14:26:51 +00:00
Reapply "IR: drop RCLSE"
This reverts commit 78aee4d96e39c9ef6415a7dca21fd6b81dabe12e.
This commit is contained in:
parent
8dae4bcd44
commit
3a334c4585
@ -138,7 +138,6 @@ set (SRCS
|
||||
Interface/IR/IREmitter.cpp
|
||||
Interface/IR/PassManager.cpp
|
||||
Interface/IR/Passes/ConstProp.cpp
|
||||
Interface/IR/Passes/DeadContextStoreElimination.cpp
|
||||
Interface/IR/Passes/IRDumperPass.cpp
|
||||
Interface/IR/Passes/IRValidation.cpp
|
||||
Interface/IR/Passes/RAValidation.cpp
|
||||
|
@ -625,7 +625,7 @@ DEF_OP(ShiftFlags) {
|
||||
|
||||
// Set the output outside the branch to avoid needing an extra leg of the
|
||||
// branch. We specifically do not hardcode the PF register anywhere (relying
|
||||
// on a tied SRA register instead) to avoid fighting with RA/RCLSE.
|
||||
// on a tied SRA register instead) to avoid fighting with RA.
|
||||
if (PFTemp != PFInput) {
|
||||
mov(ARMEmitter::Size::i64Bit, PFTemp, PFInput);
|
||||
}
|
||||
|
@ -70,7 +70,6 @@ void PassManager::AddDefaultPasses(FEXCore::Context::ContextImpl* ctx) {
|
||||
FEX_CONFIG_OPT(DisablePasses, O0);
|
||||
|
||||
if (!DisablePasses()) {
|
||||
InsertPass(CreateContextLoadStoreElimination(ctx->HostFeatures.SupportsAVX && ctx->HostFeatures.SupportsSVE256));
|
||||
InsertPass(CreateDeadStoreElimination());
|
||||
InsertPass(CreateConstProp(ctx->HostFeatures.SupportsTSOImm9, &ctx->CPUID));
|
||||
InsertPass(CreateDeadFlagCalculationEliminination());
|
||||
|
@ -17,7 +17,6 @@ class RegisterAllocationPass;
|
||||
class RegisterAllocationData;
|
||||
|
||||
fextl::unique_ptr<FEXCore::IR::Pass> CreateConstProp(bool SupportsTSOImm9, const FEXCore::CPUIDEmu* CPUID);
|
||||
fextl::unique_ptr<FEXCore::IR::Pass> CreateContextLoadStoreElimination(bool SupportsSVE256);
|
||||
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadFlagCalculationEliminination();
|
||||
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadStoreElimination();
|
||||
fextl::unique_ptr<FEXCore::IR::RegisterAllocationPass> CreateRegisterAllocationPass();
|
||||
|
@ -1,772 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
$info$
|
||||
tags: ir|opts
|
||||
desc: Transforms ContextLoad/Store to temporaries, similar to mem2reg
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "Interface/IR/IR.h"
|
||||
#include "Interface/IR/IREmitter.h"
|
||||
#include "Interface/IR/Passes.h"
|
||||
#include "Interface/IR/PassManager.h"
|
||||
|
||||
#include <FEXCore/Core/CoreState.h>
|
||||
#include <FEXCore/Core/X86Enums.h>
|
||||
#include <FEXCore/IR/IR.h>
|
||||
#include <FEXCore/Utils/EnumOperators.h>
|
||||
#include <FEXCore/Utils/LogManager.h>
|
||||
#include <FEXCore/Utils/Profiler.h>
|
||||
#include <FEXCore/fextl/unordered_map.h>
|
||||
#include <FEXCore/fextl/vector.h>
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
namespace {
|
||||
struct ContextMemberClassification {
|
||||
size_t Offset;
|
||||
uint16_t Size;
|
||||
};
|
||||
|
||||
enum class LastAccessType {
|
||||
NONE = (0b000 << 0), ///< Was never previously accessed
|
||||
WRITE = (0b001 << 0), ///< Was fully overwritten
|
||||
READ = (0b010 << 0), ///< Was fully read
|
||||
INVALID = (0b011 << 0), ///< Accessing this is invalid
|
||||
MASK = (0b011 << 0),
|
||||
PARTIAL = (0b100 << 0),
|
||||
PARTIAL_WRITE = (PARTIAL | WRITE), ///< Was partially written
|
||||
PARTIAL_READ = (PARTIAL | READ), ///< Was partially read
|
||||
};
|
||||
FEX_DEF_NUM_OPS(LastAccessType);
|
||||
|
||||
static bool IsWriteAccess(LastAccessType Type) {
|
||||
return (Type & LastAccessType::MASK) == LastAccessType::WRITE;
|
||||
}
|
||||
|
||||
static bool IsReadAccess(LastAccessType Type) {
|
||||
return (Type & LastAccessType::MASK) == LastAccessType::READ;
|
||||
}
|
||||
|
||||
[[maybe_unused]]
|
||||
static bool IsInvalidAccess(LastAccessType Type) {
|
||||
return (Type & LastAccessType::MASK) == LastAccessType::INVALID;
|
||||
}
|
||||
|
||||
[[maybe_unused]]
|
||||
static bool IsPartialAccess(LastAccessType Type) {
|
||||
return (Type & LastAccessType::PARTIAL) == LastAccessType::PARTIAL;
|
||||
}
|
||||
|
||||
[[maybe_unused]]
|
||||
static bool IsFullAccess(LastAccessType Type) {
|
||||
return (Type & LastAccessType::PARTIAL) == LastAccessType::NONE;
|
||||
}
|
||||
|
||||
struct ContextMemberInfo {
|
||||
ContextMemberClassification Class;
|
||||
LastAccessType Accessed;
|
||||
FEXCore::IR::RegisterClassType AccessRegClass;
|
||||
uint32_t AccessOffset;
|
||||
uint8_t AccessSize;
|
||||
///< The last value that was loaded or stored.
|
||||
FEXCore::IR::Ref ValueNode;
|
||||
///< With a store access, the store node that is doing the operation.
|
||||
FEXCore::IR::Ref StoreNode;
|
||||
};
|
||||
|
||||
struct ContextInfo {
|
||||
fextl::vector<ContextMemberInfo*> Lookup;
|
||||
fextl::vector<ContextMemberInfo> ClassificationInfo;
|
||||
};
|
||||
|
||||
static void ClassifyContextStruct(ContextInfo* ContextClassificationInfo, bool SupportsAVX256) {
|
||||
auto ContextClassification = &ContextClassificationInfo->ClassificationInfo;
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, InlineJITBlockHeader),
|
||||
sizeof(FEXCore::Core::CPUState::InlineJITBlockHeader),
|
||||
},
|
||||
LastAccessType::INVALID,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
// DeferredSignalRefCount
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount),
|
||||
sizeof(FEXCore::Core::CPUState::DeferredSignalRefCount),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, avx_high[0][0]) + FEXCore::Core::CPUState::XMM_SSE_REG_SIZE * i,
|
||||
FEXCore::Core::CPUState::XMM_SSE_REG_SIZE,
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
}
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, rip),
|
||||
sizeof(FEXCore::Core::CPUState::rip),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_GPRS; ++i) {
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, gregs[0]) + sizeof(FEXCore::Core::CPUState::gregs[0]) * i,
|
||||
FEXCore::Core::CPUState::GPR_REG_SIZE,
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
}
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, _pad),
|
||||
sizeof(FEXCore::Core::CPUState::_pad),
|
||||
},
|
||||
LastAccessType::INVALID,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
static_assert(offsetof(FEXCore::Core::CPUState, xmm.avx.data[0][0]) == 416, "What");
|
||||
static_assert(FEXCore::Core::CPUState::XMM_AVX_REG_SIZE == 32, "What");
|
||||
if (SupportsAVX256) {
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, xmm.avx.data[0][0]) + FEXCore::Core::CPUState::XMM_AVX_REG_SIZE * i,
|
||||
FEXCore::Core::CPUState::XMM_AVX_REG_SIZE,
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, xmm.sse.data[0][0]) + FEXCore::Core::CPUState::XMM_SSE_REG_SIZE * i,
|
||||
FEXCore::Core::CPUState::XMM_SSE_REG_SIZE,
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
}
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, xmm.sse.pad[0][0]),
|
||||
static_cast<uint16_t>(FEXCore::Core::CPUState::XMM_SSE_REG_SIZE * FEXCore::Core::CPUState::NUM_XMMS),
|
||||
},
|
||||
LastAccessType::INVALID,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
}
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, es_idx),
|
||||
sizeof(FEXCore::Core::CPUState::es_idx),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, cs_idx),
|
||||
sizeof(FEXCore::Core::CPUState::cs_idx),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, ss_idx),
|
||||
sizeof(FEXCore::Core::CPUState::ss_idx),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, ds_idx),
|
||||
sizeof(FEXCore::Core::CPUState::ds_idx),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, gs_idx),
|
||||
sizeof(FEXCore::Core::CPUState::gs_idx),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, fs_idx),
|
||||
sizeof(FEXCore::Core::CPUState::fs_idx),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, _pad2),
|
||||
sizeof(FEXCore::Core::CPUState::_pad2),
|
||||
},
|
||||
LastAccessType::INVALID,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, es_cached),
|
||||
sizeof(FEXCore::Core::CPUState::es_cached),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, cs_cached),
|
||||
sizeof(FEXCore::Core::CPUState::cs_cached),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, ss_cached),
|
||||
sizeof(FEXCore::Core::CPUState::ss_cached),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, ds_cached),
|
||||
sizeof(FEXCore::Core::CPUState::ds_cached),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, gs_cached),
|
||||
sizeof(FEXCore::Core::CPUState::gs_cached),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, fs_cached),
|
||||
sizeof(FEXCore::Core::CPUState::fs_cached),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_FLAGS; ++i) {
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, flags[0]) + sizeof(FEXCore::Core::CPUState::flags[0]) * i,
|
||||
FEXCore::Core::CPUState::FLAG_SIZE,
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
}
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, pf_raw),
|
||||
sizeof(FEXCore::Core::CPUState::pf_raw),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, af_raw),
|
||||
sizeof(FEXCore::Core::CPUState::af_raw),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) {
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {offsetof(FEXCore::Core::CPUState, mm[0][0]) + sizeof(FEXCore::Core::CPUState::mm[0]) * i,
|
||||
FEXCore::Core::CPUState::MM_REG_SIZE},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
}
|
||||
|
||||
// GDTs
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_GDTS; ++i) {
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, gdt[0]) + sizeof(FEXCore::Core::CPUState::gdt[0]) * i,
|
||||
sizeof(FEXCore::Core::CPUState::gdt[0]),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
}
|
||||
|
||||
// FCW
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, FCW),
|
||||
sizeof(FEXCore::Core::CPUState::FCW),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
// AbridgedFTW
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, AbridgedFTW),
|
||||
sizeof(FEXCore::Core::CPUState::AbridgedFTW),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
// _pad3
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, _pad3),
|
||||
sizeof(FEXCore::Core::CPUState::_pad3),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
[[maybe_unused]] size_t ClassifiedStructSize {};
|
||||
ContextClassificationInfo->Lookup.reserve(sizeof(FEXCore::Core::CPUState));
|
||||
for (auto& it : *ContextClassification) {
|
||||
LOGMAN_THROW_A_FMT(it.Class.Offset == ContextClassificationInfo->Lookup.size(), "Offset mismatch (offset={})", it.Class.Offset);
|
||||
for (int i = 0; i < it.Class.Size; i++) {
|
||||
ContextClassificationInfo->Lookup.push_back(&it);
|
||||
}
|
||||
ClassifiedStructSize += it.Class.Size;
|
||||
}
|
||||
|
||||
LOGMAN_THROW_AA_FMT(ClassifiedStructSize == sizeof(FEXCore::Core::CPUState),
|
||||
"Classified CPUStruct size doesn't match real CPUState struct size! {} (classified) != {} (real)",
|
||||
ClassifiedStructSize, sizeof(FEXCore::Core::CPUState));
|
||||
|
||||
LOGMAN_THROW_A_FMT(ContextClassificationInfo->Lookup.size() == sizeof(FEXCore::Core::CPUState),
|
||||
"Classified lookup size doesn't match real CPUState struct size! {} (classified) != {} (real)",
|
||||
ContextClassificationInfo->Lookup.size(), sizeof(FEXCore::Core::CPUState));
|
||||
}
|
||||
|
||||
static void ResetClassificationAccesses(ContextInfo* ContextClassificationInfo, bool SupportsAVX256) {
|
||||
auto ContextClassification = &ContextClassificationInfo->ClassificationInfo;
|
||||
|
||||
auto SetAccess = [&](size_t Offset, LastAccessType Access) {
|
||||
ContextClassification->at(Offset).Accessed = Access;
|
||||
ContextClassification->at(Offset).AccessRegClass = FEXCore::IR::InvalidClass;
|
||||
ContextClassification->at(Offset).AccessOffset = 0;
|
||||
ContextClassification->at(Offset).StoreNode = nullptr;
|
||||
};
|
||||
|
||||
size_t Offset = 0;
|
||||
|
||||
///< InlineJITBlockHeader
|
||||
SetAccess(Offset++, LastAccessType::INVALID);
|
||||
|
||||
// DeferredSignalRefCount
|
||||
SetAccess(Offset++, LastAccessType::INVALID);
|
||||
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
|
||||
///< avx_high
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
}
|
||||
|
||||
// rip
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
|
||||
///< gregs
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_GPRS; ++i) {
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
}
|
||||
|
||||
// pad
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
|
||||
// xmm
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
}
|
||||
|
||||
// xmm_pad
|
||||
if (!SupportsAVX256) {
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
}
|
||||
|
||||
// Segment indexes
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
|
||||
// Pad2
|
||||
SetAccess(Offset++, LastAccessType::INVALID);
|
||||
|
||||
// Segments
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
|
||||
///< flags
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_FLAGS; ++i) {
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
}
|
||||
|
||||
///< pf_raw
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
|
||||
///< af_raw
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
|
||||
///< mm
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) {
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
}
|
||||
|
||||
///< gdt
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_GDTS; ++i) {
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
}
|
||||
|
||||
///< FCW
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
|
||||
///< AbridgedFTW
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
|
||||
// pad3
|
||||
SetAccess(Offset++, LastAccessType::INVALID);
|
||||
}
|
||||
|
||||
struct BlockInfo {
|
||||
fextl::vector<FEXCore::IR::Ref> Predecessors;
|
||||
fextl::vector<FEXCore::IR::Ref> Successors;
|
||||
ContextInfo IncomingClassifiedStruct;
|
||||
ContextInfo OutgoingClassifiedStruct;
|
||||
};
|
||||
|
||||
class RCLSE final : public FEXCore::IR::Pass {
|
||||
public:
|
||||
explicit RCLSE(bool SupportsAVX256)
|
||||
: SupportsAVX256 {SupportsAVX256} {
|
||||
ClassifyContextStruct(&ClassifiedStruct, SupportsAVX256);
|
||||
}
|
||||
void Run(FEXCore::IR::IREmitter* IREmit) override;
|
||||
private:
|
||||
ContextInfo ClassifiedStruct;
|
||||
fextl::unordered_map<FEXCore::IR::NodeID, BlockInfo> OffsetToBlockMap;
|
||||
|
||||
bool SupportsAVX256;
|
||||
|
||||
ContextMemberInfo* FindMemberInfo(ContextInfo* ClassifiedInfo, uint32_t Offset, uint8_t Size);
|
||||
ContextMemberInfo* RecordAccess(ContextMemberInfo* Info, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size,
|
||||
LastAccessType AccessType, FEXCore::IR::Ref Node, FEXCore::IR::Ref StoreNode = nullptr);
|
||||
ContextMemberInfo* RecordAccess(ContextInfo* ClassifiedInfo, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size,
|
||||
LastAccessType AccessType, FEXCore::IR::Ref Node, FEXCore::IR::Ref StoreNode = nullptr);
|
||||
|
||||
void HandleLoadFlag(FEXCore::IR::IREmitter* IREmit, ContextInfo* LocalInfo, FEXCore::IR::Ref CodeNode, unsigned Flag);
|
||||
|
||||
// Classify context loads and stores.
|
||||
void ClassifyContextLoad(FEXCore::IR::IREmitter* IREmit, ContextInfo* LocalInfo, FEXCore::IR::RegisterClassType Class, uint32_t Offset,
|
||||
uint8_t Size, FEXCore::IR::Ref CodeNode, FEXCore::IR::NodeIterator BlockEnd);
|
||||
void ClassifyContextStore(FEXCore::IR::IREmitter* IREmit, ContextInfo* LocalInfo, FEXCore::IR::RegisterClassType Class, uint32_t Offset,
|
||||
uint8_t Size, FEXCore::IR::Ref CodeNode, FEXCore::IR::Ref ValueNode);
|
||||
|
||||
// Block local Passes
|
||||
void RedundantStoreLoadElimination(FEXCore::IR::IREmitter* IREmit);
|
||||
|
||||
unsigned OffsetForReg(FEXCore::IR::RegisterClassType Class, unsigned Reg, unsigned Size) {
|
||||
if (Class == FEXCore::IR::FPRClass) {
|
||||
return Size == 32 ? offsetof(FEXCore::Core::CPUState, xmm.avx.data[Reg][0]) : offsetof(FEXCore::Core::CPUState, xmm.sse.data[Reg][0]);
|
||||
} else if (Reg == FEXCore::Core::CPUState::PF_AS_GREG) {
|
||||
return offsetof(FEXCore::Core::CPUState, pf_raw);
|
||||
} else if (Reg == FEXCore::Core::CPUState::AF_AS_GREG) {
|
||||
return offsetof(FEXCore::Core::CPUState, af_raw);
|
||||
} else {
|
||||
return offsetof(FEXCore::Core::CPUState, gregs[Reg]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
ContextMemberInfo* RCLSE::FindMemberInfo(ContextInfo* ContextClassificationInfo, uint32_t Offset, uint8_t Size) {
|
||||
return ContextClassificationInfo->Lookup.at(Offset);
|
||||
}
|
||||
|
||||
ContextMemberInfo* RCLSE::RecordAccess(ContextMemberInfo* Info, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size,
|
||||
LastAccessType AccessType, FEXCore::IR::Ref ValueNode, FEXCore::IR::Ref StoreNode) {
|
||||
LOGMAN_THROW_AA_FMT((Offset + Size) <= (Info->Class.Offset + Info->Class.Size), "Access to context item went over member size");
|
||||
LOGMAN_THROW_AA_FMT(Info->Accessed != LastAccessType::INVALID, "Tried to access invalid member");
|
||||
|
||||
// If we aren't fully overwriting the member then it is a partial write that we need to track
|
||||
if (Size < Info->Class.Size) {
|
||||
AccessType = AccessType == LastAccessType::WRITE ? LastAccessType::PARTIAL_WRITE : LastAccessType::PARTIAL_READ;
|
||||
}
|
||||
if (Size > Info->Class.Size) {
|
||||
LOGMAN_MSG_A_FMT("Can't handle this");
|
||||
}
|
||||
|
||||
Info->Accessed = AccessType;
|
||||
Info->AccessRegClass = RegClass;
|
||||
Info->AccessOffset = Offset;
|
||||
Info->AccessSize = Size;
|
||||
Info->ValueNode = ValueNode;
|
||||
if (StoreNode != nullptr) {
|
||||
Info->StoreNode = StoreNode;
|
||||
}
|
||||
return Info;
|
||||
}
|
||||
|
||||
ContextMemberInfo* RCLSE::RecordAccess(ContextInfo* ClassifiedInfo, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size,
|
||||
LastAccessType AccessType, FEXCore::IR::Ref ValueNode, FEXCore::IR::Ref StoreNode) {
|
||||
ContextMemberInfo* Info = FindMemberInfo(ClassifiedInfo, Offset, Size);
|
||||
return RecordAccess(Info, RegClass, Offset, Size, AccessType, ValueNode, StoreNode);
|
||||
}
|
||||
|
||||
void RCLSE::ClassifyContextLoad(FEXCore::IR::IREmitter* IREmit, ContextInfo* LocalInfo, FEXCore::IR::RegisterClassType Class,
|
||||
uint32_t Offset, uint8_t Size, FEXCore::IR::Ref CodeNode, FEXCore::IR::NodeIterator BlockEnd) {
|
||||
auto Info = FindMemberInfo(LocalInfo, Offset, Size);
|
||||
ContextMemberInfo PreviousMemberInfoCopy = *Info;
|
||||
RecordAccess(Info, Class, Offset, Size, LastAccessType::READ, CodeNode);
|
||||
|
||||
if (PreviousMemberInfoCopy.AccessRegClass == Info->AccessRegClass && PreviousMemberInfoCopy.AccessOffset == Info->AccessOffset &&
|
||||
PreviousMemberInfoCopy.AccessSize == Size) {
|
||||
// This optimizes two cases:
|
||||
// - Previous access was a load, and we have a redundant load of the same value.
|
||||
// - Previous access was a store, and we are redundantly loading immediately after the store. Eliminating the store.
|
||||
IREmit->ReplaceAllUsesWithRange(CodeNode, PreviousMemberInfoCopy.ValueNode, IREmit->GetIterator(IREmit->WrapNode(CodeNode)), BlockEnd);
|
||||
RecordAccess(Info, Class, Offset, Size, LastAccessType::READ, PreviousMemberInfoCopy.ValueNode);
|
||||
}
|
||||
// TODO: Optimize the case of partial loads.
|
||||
}
|
||||
|
||||
void RCLSE::ClassifyContextStore(FEXCore::IR::IREmitter* IREmit, ContextInfo* LocalInfo, FEXCore::IR::RegisterClassType Class,
|
||||
uint32_t Offset, uint8_t Size, FEXCore::IR::Ref CodeNode, FEXCore::IR::Ref ValueNode) {
|
||||
auto Info = FindMemberInfo(LocalInfo, Offset, Size);
|
||||
ContextMemberInfo PreviousMemberInfoCopy = *Info;
|
||||
RecordAccess(Info, Class, Offset, Size, LastAccessType::WRITE, ValueNode, CodeNode);
|
||||
|
||||
if (PreviousMemberInfoCopy.AccessRegClass == Info->AccessRegClass && PreviousMemberInfoCopy.AccessOffset == Info->AccessOffset &&
|
||||
PreviousMemberInfoCopy.AccessSize == Size && PreviousMemberInfoCopy.Accessed == LastAccessType::WRITE) {
|
||||
// This optimizes redundant stores with no intervening load
|
||||
// TODO: this is causing RA to fall over in some titles, disabling for now.
|
||||
// Revisit when the new RA lands.
|
||||
#if 0
|
||||
IREmit->Remove(PreviousMemberInfoCopy.StoreNode);
|
||||
#endif
|
||||
}
|
||||
|
||||
// TODO: Optimize the case of partial stores.
|
||||
}
|
||||
|
||||
void RCLSE::HandleLoadFlag(FEXCore::IR::IREmitter* IREmit, ContextInfo* LocalInfo, FEXCore::IR::Ref CodeNode, unsigned Flag) {
|
||||
const auto FlagOffset = offsetof(FEXCore::Core::CPUState, flags[Flag]);
|
||||
auto Info = FindMemberInfo(LocalInfo, FlagOffset, 1);
|
||||
LastAccessType LastAccess = Info->Accessed;
|
||||
auto LastValueNode = Info->ValueNode;
|
||||
|
||||
if (IsWriteAccess(LastAccess)) { // 1 byte so always a full write
|
||||
// If the last store matches this load value then we can replace the loaded value with the previous valid one
|
||||
IREmit->SetWriteCursor(CodeNode);
|
||||
IREmit->ReplaceAllUsesWith(CodeNode, LastValueNode);
|
||||
RecordAccess(Info, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::READ, LastValueNode);
|
||||
} else if (IsReadAccess(LastAccess)) {
|
||||
IREmit->ReplaceAllUsesWith(CodeNode, LastValueNode);
|
||||
RecordAccess(Info, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::READ, LastValueNode);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This pass removes redundant pairs of storecontext and loadcontext ops
|
||||
*
|
||||
* eg.
|
||||
* %26 i128 = LoadMem %25 i64, 0x10
|
||||
* (%%27) StoreContext %26 i128, 0x10, 0xb0
|
||||
* %28 i128 = LoadContext 0x10, 0x90
|
||||
* %29 i128 = LoadContext 0x10, 0xb0
|
||||
* Converts to
|
||||
* %26 i128 = LoadMem %25 i64, 0x10
|
||||
* (%%27) StoreContext %26 i128, 0x10, 0xb0
|
||||
* %28 i128 = LoadContext 0x10, 0x90
|
||||
*
|
||||
* eg.
|
||||
* %6 i128 = LoadContext 0x10, 0x90
|
||||
* %7 i128 = LoadContext 0x10, 0x90
|
||||
* %8 i128 = VXor %7 i128, %6 i128
|
||||
* Converts to
|
||||
* %6 i128 = LoadContext 0x10, 0x90
|
||||
* %7 i128 = VXor %6 i128, %6 i128
|
||||
*
|
||||
* eg.
|
||||
* (%%189) StoreContext %188 i128, 0x10, 0xa0
|
||||
* %190 i128 = LoadContext 0x10, 0x90
|
||||
* %192 i128 = VAdd %188 i128, %190 i128, 0x10, 0x4
|
||||
* (%%193) StoreContext %192 i128, 0x10, 0xa0
|
||||
* Converts to
|
||||
* %173 i128 = LoadContext 0x10, 0x90
|
||||
* %175 i128 = VAdd %172 i128, %173 i128, 0x10, 0x4
|
||||
* (%%176) StoreContext %175 i128, 0x10, 0xa0
|
||||
|
||||
*/
|
||||
void RCLSE::RedundantStoreLoadElimination(FEXCore::IR::IREmitter* IREmit) {
|
||||
using namespace FEXCore;
|
||||
using namespace FEXCore::IR;
|
||||
|
||||
auto CurrentIR = IREmit->ViewIR();
|
||||
auto OriginalWriteCursor = IREmit->GetWriteCursor();
|
||||
|
||||
// XXX: Walk the list and calculate the control flow
|
||||
|
||||
ContextInfo& LocalInfo = ClassifiedStruct;
|
||||
|
||||
for (auto [BlockNode, BlockHeader] : CurrentIR.GetBlocks()) {
|
||||
auto BlockOp = BlockHeader->CW<FEXCore::IR::IROp_CodeBlock>();
|
||||
auto BlockEnd = IREmit->GetIterator(BlockOp->Last);
|
||||
|
||||
ResetClassificationAccesses(&LocalInfo, SupportsAVX256);
|
||||
|
||||
for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) {
|
||||
if (IROp->Op == OP_STORECONTEXT) {
|
||||
auto Op = IROp->CW<IR::IROp_StoreContext>();
|
||||
ClassifyContextStore(IREmit, &LocalInfo, Op->Class, Op->Offset, IROp->Size, CodeNode, CurrentIR.GetNode(Op->Value));
|
||||
} else if (IROp->Op == OP_STOREREGISTER) {
|
||||
auto Op = IROp->CW<IR::IROp_StoreRegister>();
|
||||
auto Offset = OffsetForReg(Op->Class, Op->Reg, IROp->Size);
|
||||
|
||||
ClassifyContextStore(IREmit, &LocalInfo, Op->Class, Offset, IROp->Size, CodeNode, CurrentIR.GetNode(Op->Value));
|
||||
} else if (IROp->Op == OP_LOADREGISTER) {
|
||||
auto Op = IROp->CW<IR::IROp_LoadRegister>();
|
||||
auto Offset = OffsetForReg(Op->Class, Op->Reg, IROp->Size);
|
||||
ClassifyContextLoad(IREmit, &LocalInfo, Op->Class, Offset, IROp->Size, CodeNode, BlockEnd);
|
||||
} else if (IROp->Op == OP_LOADCONTEXT) {
|
||||
auto Op = IROp->CW<IR::IROp_LoadContext>();
|
||||
ClassifyContextLoad(IREmit, &LocalInfo, Op->Class, Op->Offset, IROp->Size, CodeNode, BlockEnd);
|
||||
} else if (IROp->Op == OP_STOREFLAG) {
|
||||
const auto Op = IROp->CW<IR::IROp_StoreFlag>();
|
||||
const auto FlagOffset = offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag;
|
||||
auto Info = FindMemberInfo(&LocalInfo, FlagOffset, 1);
|
||||
auto LastStoreNode = Info->StoreNode;
|
||||
RecordAccess(&LocalInfo, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::WRITE, CurrentIR.GetNode(Op->Header.Args[0]), CodeNode);
|
||||
|
||||
// Flags don't alias, so we can take the simple route here. Kill any flags that have been overwritten
|
||||
if (LastStoreNode != nullptr) {
|
||||
IREmit->Remove(LastStoreNode);
|
||||
}
|
||||
} else if (IROp->Op == OP_INVALIDATEFLAGS) {
|
||||
auto Op = IROp->CW<IR::IROp_InvalidateFlags>();
|
||||
|
||||
// Loop through non-reserved flag stores and eliminate unused ones.
|
||||
for (size_t F = 0; F < Core::CPUState::NUM_EFLAG_BITS; F++) {
|
||||
if (!(Op->Flags & (1ULL << F))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto FlagOffset = offsetof(FEXCore::Core::CPUState, flags[0]) + F;
|
||||
auto Info = FindMemberInfo(&LocalInfo, FlagOffset, 1);
|
||||
auto LastStoreNode = Info->StoreNode;
|
||||
|
||||
// Flags don't alias, so we can take the simple route here. Kill any flags that have been invalidated without a read.
|
||||
if (LastStoreNode != nullptr) {
|
||||
IREmit->SetWriteCursor(CodeNode);
|
||||
RecordAccess(&LocalInfo, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::WRITE, IREmit->_Constant(0), CodeNode);
|
||||
|
||||
IREmit->Remove(LastStoreNode);
|
||||
}
|
||||
}
|
||||
} else if (IROp->Op == OP_LOADFLAG) {
|
||||
const auto Op = IROp->CW<IR::IROp_LoadFlag>();
|
||||
|
||||
HandleLoadFlag(IREmit, &LocalInfo, CodeNode, Op->Flag);
|
||||
} else if (IROp->Op == OP_LOADDF) {
|
||||
HandleLoadFlag(IREmit, &LocalInfo, CodeNode, X86State::RFLAG_DF_RAW_LOC);
|
||||
} else if (IROp->Op == OP_SYSCALL || IROp->Op == OP_INLINESYSCALL) {
|
||||
FEXCore::IR::SyscallFlags Flags {};
|
||||
if (IROp->Op == OP_SYSCALL) {
|
||||
auto Op = IROp->C<IR::IROp_Syscall>();
|
||||
Flags = Op->Flags;
|
||||
} else {
|
||||
auto Op = IROp->C<IR::IROp_InlineSyscall>();
|
||||
Flags = Op->Flags;
|
||||
}
|
||||
|
||||
if ((Flags & FEXCore::IR::SyscallFlags::OPTIMIZETHROUGH) != FEXCore::IR::SyscallFlags::OPTIMIZETHROUGH) {
|
||||
// We can't track through these
|
||||
ResetClassificationAccesses(&LocalInfo, SupportsAVX256);
|
||||
}
|
||||
} else if (IROp->Op == OP_STORECONTEXTINDEXED || IROp->Op == OP_LOADCONTEXTINDEXED || IROp->Op == OP_BREAK) {
|
||||
// We can't track through these
|
||||
ResetClassificationAccesses(&LocalInfo, SupportsAVX256);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
IREmit->SetWriteCursor(OriginalWriteCursor);
|
||||
}
|
||||
|
||||
void RCLSE::Run(FEXCore::IR::IREmitter* IREmit) {
|
||||
FEXCORE_PROFILE_SCOPED("PassManager::RCLSE");
|
||||
RedundantStoreLoadElimination(IREmit);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace FEXCore::IR {
|
||||
|
||||
fextl::unique_ptr<FEXCore::IR::Pass> CreateContextLoadStoreElimination(bool SupportsAVX256) {
|
||||
return fextl::make_unique<RCLSE>(SupportsAVX256);
|
||||
}
|
||||
|
||||
} // namespace FEXCore::IR
|
@ -110,7 +110,6 @@ IR to IR Optimization
|
||||
- [PassManager.cpp](../FEXCore/Source/Interface/IR/PassManager.cpp): Defines which passes are run, and runs them
|
||||
- [PassManager.h](../FEXCore/Source/Interface/IR/PassManager.h)
|
||||
- [ConstProp.cpp](../FEXCore/Source/Interface/IR/Passes/ConstProp.cpp): ConstProp, ZExt elim, const pooling, fcmp reduction, const inlining
|
||||
- [DeadContextStoreElimination.cpp](../FEXCore/Source/Interface/IR/Passes/DeadContextStoreElimination.cpp): Transforms ContextLoad/Store to temporaries, similar to mem2reg
|
||||
- [DeadStoreElimination.cpp](../FEXCore/Source/Interface/IR/Passes/DeadStoreElimination.cpp): Cross block store-after-store elimination
|
||||
- [IRValidation.cpp](../FEXCore/Source/Interface/IR/Passes/IRValidation.cpp): Sanity checking pass
|
||||
- [RedundantFlagCalculationElimination.cpp](../FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp): This is not used right now, possibly broken
|
||||
|
Loading…
x
Reference in New Issue
Block a user