mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-02-13 11:13:38 +00:00
OpCodeDispatcher: Optimize a case of GOT calculation
32-bit GOT calculation needs to do a call+pop to do get the EIP on 32-bit. LEA doesn't work because it there is no EIP relative ops like on x86-64. This causes a terrible block split on every GOT calculation without the optimization in place. Now the block can continue through this weird GOT calculation. This will be worthwhile for our 32-bit thunks where for some reason the GOT calculation can't be removed. The GOT is calculated even though it isn't used.
This commit is contained in:
parent
2123868a42
commit
a6b0181cd4
29
External/FEXCore/Source/Interface/Core/Core.cpp
vendored
29
External/FEXCore/Source/Interface/Core/Core.cpp
vendored
@ -856,22 +856,25 @@ namespace FEXCore::Context {
|
||||
Thread->OpDispatcher->_ExitFunction(Thread->OpDispatcher->_EntrypointOffset(Block.Entry - GuestRIP, GPRSize));
|
||||
}
|
||||
|
||||
// If we had a dispatch error then leave early
|
||||
if (HadDispatchError) {
|
||||
if (TotalInstructions == 0) {
|
||||
// Couldn't handle any instruction in op dispatcher
|
||||
Thread->OpDispatcher->ResetWorkingList();
|
||||
return { nullptr, nullptr, 0, 0, 0, 0 };
|
||||
}
|
||||
else {
|
||||
const uint8_t GPRSize = GetGPRSize();
|
||||
const bool NeedsBlockEnd = (HadDispatchError && TotalInstructions > 0) ||
|
||||
(Thread->OpDispatcher->NeedsBlockEnder() && i + 1 == InstsInBlock);
|
||||
|
||||
// We had some instructions. Early exit
|
||||
Thread->OpDispatcher->_ExitFunction(Thread->OpDispatcher->_EntrypointOffset(Block.Entry + BlockInstructionsLength - GuestRIP, GPRSize));
|
||||
break;
|
||||
}
|
||||
// If we had a dispatch error then leave early
|
||||
if (HadDispatchError && TotalInstructions == 0) {
|
||||
// Couldn't handle any instruction in op dispatcher
|
||||
Thread->OpDispatcher->ResetWorkingList();
|
||||
return { nullptr, nullptr, 0, 0, 0, 0 };
|
||||
}
|
||||
|
||||
if (NeedsBlockEnd) {
|
||||
const uint8_t GPRSize = GetGPRSize();
|
||||
|
||||
// We had some instructions. Early exit
|
||||
Thread->OpDispatcher->_ExitFunction(Thread->OpDispatcher->_EntrypointOffset(Block.Entry + BlockInstructionsLength - GuestRIP, GPRSize));
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
if (Thread->OpDispatcher->FinishOp(DecodedInfo->PC + DecodedInfo->InstSize, i + 1 == InstsInBlock)) {
|
||||
break;
|
||||
}
|
||||
|
@ -1127,6 +1127,35 @@ void Decoder::BranchTargetInMultiblockRange() {
|
||||
}
|
||||
}
|
||||
|
||||
bool Decoder::BranchTargetCanContinue(bool FinalInstruction) const {
|
||||
if (FinalInstruction) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t TargetRIP = 0;
|
||||
const uint8_t GPRSize = CTX->GetGPRSize();
|
||||
|
||||
if (DecodeInst->OP == 0xE8) { // Call - immediate target
|
||||
const uint64_t NextRIP = DecodeInst->PC + DecodeInst->InstSize;
|
||||
LOGMAN_THROW_A_FMT(DecodeInst->Src[0].IsLiteral(), "Had wrong operand type");
|
||||
TargetRIP = DecodeInst->PC + DecodeInst->InstSize + DecodeInst->Src[0].Data.Literal.Value;
|
||||
|
||||
if (GPRSize == 4) {
|
||||
// If we are running a 32bit guest then wrap around addresses that go above 32bit
|
||||
TargetRIP &= 0xFFFFFFFFU;
|
||||
}
|
||||
|
||||
if (TargetRIP == NextRIP) {
|
||||
// Optimize the case that the instruction is jumping just after itself.
|
||||
// This is a GOT calculation which we can optimize out.
|
||||
// Optimization occurs inside of the OpDispatcher implementation
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
const uint8_t *Decoder::AdjustAddrForSpecialRegion(uint8_t const* _InstStream, uint64_t EntryPoint, uint64_t RIP) {
|
||||
constexpr uint64_t VSyscall_Base = 0xFFFF'FFFF'FF60'0000ULL;
|
||||
constexpr uint64_t VSyscall_End = VSyscall_Base + 0x1000;
|
||||
@ -1251,23 +1280,21 @@ void Decoder::DecodeInstructionsAtEntry(uint8_t const* _InstStream, uint64_t PC,
|
||||
CanContinue = true;
|
||||
}
|
||||
|
||||
bool FinalInstruction = DecodedSize >= CTX->Config.MaxInstPerBlock ||
|
||||
DecodedSize >= DefaultDecodedBufferSize ||
|
||||
TotalInstructions >= CTX->Config.MaxInstPerBlock;
|
||||
|
||||
if (DecodeInst->TableInfo->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SETS_RIP) {
|
||||
// If we have multiblock enabled
|
||||
// If the branch target is within our multiblock range then we can keep going on
|
||||
// We don't want to short circuit this since we want to calculate our ranges still
|
||||
BranchTargetInMultiblockRange();
|
||||
|
||||
// Bypass branches if we can continue through them in some cases.
|
||||
CanContinue |= BranchTargetCanContinue(FinalInstruction);
|
||||
}
|
||||
|
||||
if (!CanContinue) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (DecodedSize >= CTX->Config.MaxInstPerBlock ||
|
||||
DecodedSize >= DefaultDecodedBufferSize) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (TotalInstructions >= CTX->Config.MaxInstPerBlock) {
|
||||
if (FinalInstruction || !CanContinue) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -58,6 +58,7 @@ private:
|
||||
bool DecodeInstruction(uint64_t PC);
|
||||
|
||||
void BranchTargetInMultiblockRange();
|
||||
bool BranchTargetCanContinue(bool FinalInstruction) const;
|
||||
|
||||
uint8_t ReadByte();
|
||||
uint8_t PeekByte(uint8_t Offset) const;
|
||||
|
@ -783,8 +783,17 @@ void OpDispatchBuilder::CALLOp(OpcodeArgs) {
|
||||
|
||||
_StoreMem(GPRClass, GPRSize, NewSP, ConstantPCReturn, GPRSize);
|
||||
|
||||
// Store the RIP
|
||||
_ExitFunction(NewRIP); // If we get here then leave the function now
|
||||
const uint64_t NextRIP = Op->PC + Op->InstSize;
|
||||
LOGMAN_THROW_A_FMT(Op->Src[0].IsLiteral(), "Had wrong operand type");
|
||||
const uint64_t TargetRIP = Op->PC + Op->InstSize + Op->Src[0].Data.Literal.Value;
|
||||
|
||||
if (NextRIP != TargetRIP) {
|
||||
// Store the RIP
|
||||
_ExitFunction(NewRIP); // If we get here then leave the function now
|
||||
}
|
||||
else {
|
||||
NeedsBlockEnd = true;
|
||||
}
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::CALLAbsoluteOp(OpcodeArgs) {
|
||||
|
@ -153,8 +153,9 @@ public:
|
||||
OpDispatchBuilder(FEXCore::Utils::IntrusivePooledAllocator &Allocator);
|
||||
|
||||
void ResetWorkingList();
|
||||
void ResetDecodeFailure() { DecodeFailure = false; }
|
||||
void ResetDecodeFailure() { NeedsBlockEnd = DecodeFailure = false; }
|
||||
bool HadDecodeFailure() const { return DecodeFailure; }
|
||||
bool NeedsBlockEnder() const { return NeedsBlockEnd; }
|
||||
|
||||
void BeginFunction(uint64_t RIP, std::vector<FEXCore::Frontend::Decoder::DecodedBlocks> const *Blocks);
|
||||
void Finalize();
|
||||
@ -659,6 +660,7 @@ public:
|
||||
bool HandledLock = false;
|
||||
private:
|
||||
bool DecodeFailure{false};
|
||||
bool NeedsBlockEnd{false};
|
||||
FEXCore::IR::IROp_IRHeader *Current_Header{};
|
||||
OrderedNode *Current_HeaderNode{};
|
||||
|
||||
|
20
unittests/32Bit_ASM/FEX_bugs/GOT_calculation.asm
Normal file
20
unittests/32Bit_ASM/FEX_bugs/GOT_calculation.asm
Normal file
@ -0,0 +1,20 @@
|
||||
%ifdef CONFIG
|
||||
{
|
||||
"RegData": {
|
||||
"RAX": "0x10011"
|
||||
},
|
||||
"Mode": "32BIT"
|
||||
}
|
||||
%endif
|
||||
|
||||
mov esp, 0xe0000010
|
||||
|
||||
; This is a common pattern in 32-bit PIE code.
|
||||
; 32-bit GOT calculation needs to do a call+pop to do get the EIP.
|
||||
; LEA doesn't work because it there is no EIP relative ops like on x86-64.
|
||||
|
||||
call target
|
||||
target:
|
||||
pop eax
|
||||
|
||||
hlt
|
Loading…
x
Reference in New Issue
Block a user