mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-12-11 16:28:21 +00:00
Merge pull request #3956 from alyssarosenzweig/opt/pop-return
small optimizations for returns
This commit is contained in:
commit
933c65d805
@ -78,8 +78,12 @@ DEF_OP(ExitFunction) {
|
||||
// L1 Cache
|
||||
ldr(TMP1, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.L1Pointer));
|
||||
|
||||
and_(ARMEmitter::Size::i64Bit, TMP4, RipReg, LookupCache::L1_ENTRIES_MASK);
|
||||
add(TMP1, TMP1, TMP4, ARMEmitter::ShiftType::LSL, 4);
|
||||
// Calculate (tmp1 + ((ripreg & L1_ENTRIES_MASK) << 4)) for the address
|
||||
// arithmetic. ubfiz+add is marginally faster on Firestorm than
|
||||
// and+add(shift). Same performance on Cortex.
|
||||
static_assert(LookupCache::L1_ENTRIES_MASK == ((1u << 20) - 1));
|
||||
ubfiz(ARMEmitter::Size::i64Bit, TMP4, RipReg, 4, 20);
|
||||
add(TMP1, TMP1, TMP4);
|
||||
|
||||
// Note: sub+cbnz used over cmp+br to preserve flags.
|
||||
ldp<ARMEmitter::IndexType::OFFSET>(TMP2, TMP1, TMP1, 0);
|
||||
|
@ -162,20 +162,16 @@ void OpDispatchBuilder::RETOp(OpcodeArgs) {
|
||||
InvalidateDeferredFlags();
|
||||
}
|
||||
|
||||
auto Constant = _Constant(GPRSize);
|
||||
auto OldSP = LoadGPRRegister(X86State::REG_RSP);
|
||||
auto NewRIP = _LoadMem(GPRClass, GPRSize, OldSP, GPRSize);
|
||||
Ref SP = _RMWHandle(LoadGPRRegister(X86State::REG_RSP));
|
||||
Ref NewRIP = Pop(GPRSize, SP);
|
||||
|
||||
Ref NewSP;
|
||||
if (Op->OP == 0xC2) {
|
||||
auto Offset = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags);
|
||||
NewSP = _Add(IR::SizeToOpSize(GPRSize), _Add(IR::SizeToOpSize(GPRSize), OldSP, Constant), Offset);
|
||||
} else {
|
||||
NewSP = _Add(IR::SizeToOpSize(GPRSize), OldSP, Constant);
|
||||
SP = _Add(IR::SizeToOpSize(GPRSize), SP, Offset);
|
||||
}
|
||||
|
||||
// Store the new stack pointer
|
||||
StoreGPRRegister(X86State::REG_RSP, NewSP);
|
||||
StoreGPRRegister(X86State::REG_RSP, SP);
|
||||
|
||||
// Store the new RIP
|
||||
ExitFunction(NewRIP);
|
||||
@ -201,36 +197,27 @@ void OpDispatchBuilder::IRETOp(OpcodeArgs) {
|
||||
|
||||
const uint8_t GPRSize = CTX->GetGPRSize();
|
||||
|
||||
auto Constant = _Constant(GPRSize);
|
||||
|
||||
auto SP = LoadGPRRegister(X86State::REG_RSP);
|
||||
Ref SP = _RMWHandle(LoadGPRRegister(X86State::REG_RSP));
|
||||
|
||||
// RIP (64/32/16 bits)
|
||||
auto NewRIP = _LoadMem(GPRClass, GPRSize, SP, GPRSize);
|
||||
SP = _Add(IR::SizeToOpSize(GPRSize), SP, Constant);
|
||||
auto NewRIP = Pop(GPRSize, SP);
|
||||
// CS (lower 16 used)
|
||||
auto NewSegmentCS = _LoadMem(GPRClass, GPRSize, SP, GPRSize);
|
||||
auto NewSegmentCS = Pop(GPRSize, SP);
|
||||
_StoreContext(2, GPRClass, NewSegmentCS, offsetof(FEXCore::Core::CPUState, cs_idx));
|
||||
UpdatePrefixFromSegment(NewSegmentCS, FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX);
|
||||
|
||||
SP = _Add(IR::SizeToOpSize(GPRSize), SP, Constant);
|
||||
// eflags (lower 16 used)
|
||||
auto eflags = _LoadMem(GPRClass, GPRSize, SP, GPRSize);
|
||||
SetPackedRFLAG(false, eflags);
|
||||
SP = _Add(IR::SizeToOpSize(GPRSize), SP, Constant);
|
||||
SetPackedRFLAG(false, Pop(GPRSize, SP));
|
||||
|
||||
if (CTX->Config.Is64BitMode) {
|
||||
// RSP and SS only happen in 64-bit mode or if this is a CPL mode jump!
|
||||
// FEX doesn't support a CPL mode switch, so don't need to worry about this on 32-bit
|
||||
StoreGPRRegister(X86State::REG_RSP, _LoadMem(GPRClass, GPRSize, SP, GPRSize));
|
||||
StoreGPRRegister(X86State::REG_RSP, Pop(GPRSize, SP));
|
||||
|
||||
SP = _Add(IR::SizeToOpSize(GPRSize), SP, Constant);
|
||||
// ss
|
||||
auto NewSegmentSS = _LoadMem(GPRClass, GPRSize, SP, GPRSize);
|
||||
auto NewSegmentSS = Pop(GPRSize, SP);
|
||||
_StoreContext(2, GPRClass, NewSegmentSS, offsetof(FEXCore::Core::CPUState, ss_idx));
|
||||
UpdatePrefixFromSegment(NewSegmentSS, FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX);
|
||||
|
||||
_Add(IR::SizeToOpSize(GPRSize), SP, Constant);
|
||||
} else {
|
||||
// Store the stack in 32-bit mode
|
||||
StoreGPRRegister(X86State::REG_RSP, SP);
|
||||
|
@ -3161,8 +3161,8 @@
|
||||
"bic w20, w22, w20",
|
||||
"strb w20, [x28, #1298]",
|
||||
"ldr x0, [x28, #2272]",
|
||||
"and x3, x21, #0xfffff",
|
||||
"add x0, x0, x3, lsl #4",
|
||||
"ubfiz x3, x21, #4, #20",
|
||||
"add x0, x0, x3",
|
||||
"ldp x1, x0, [x0]"
|
||||
]
|
||||
},
|
||||
@ -4187,8 +4187,8 @@
|
||||
"bic w20, w23, w20",
|
||||
"strb w20, [x28, #1298]",
|
||||
"ldr x0, [x28, #2272]",
|
||||
"and x3, x22, #0xfffff",
|
||||
"add x0, x0, x3, lsl #4",
|
||||
"ubfiz x3, x22, #4, #20",
|
||||
"add x0, x0, x3",
|
||||
"ldp x1, x0, [x0]"
|
||||
]
|
||||
},
|
||||
@ -6327,8 +6327,8 @@
|
||||
"add w21, w20, w21",
|
||||
"str w20, [x8, #-4]!",
|
||||
"ldr x0, [x28, #2272]",
|
||||
"and x3, x21, #0xfffff",
|
||||
"add x0, x0, x3, lsl #4",
|
||||
"ubfiz x3, x21, #4, #20",
|
||||
"add x0, x0, x3",
|
||||
"ldp x1, x0, [x0]"
|
||||
]
|
||||
},
|
||||
@ -6613,8 +6613,8 @@
|
||||
"add w21, w20, w21",
|
||||
"str w20, [x8, #-4]!",
|
||||
"ldr x0, [x28, #2272]",
|
||||
"and x3, x21, #0xfffff",
|
||||
"add x0, x0, x3, lsl #4",
|
||||
"ubfiz x3, x21, #4, #20",
|
||||
"add x0, x0, x3",
|
||||
"ldp x1, x0, [x0]"
|
||||
]
|
||||
},
|
||||
@ -6771,8 +6771,8 @@
|
||||
"add w21, w20, w21",
|
||||
"str w20, [x8, #-4]!",
|
||||
"ldr x0, [x28, #2272]",
|
||||
"and x3, x21, #0xfffff",
|
||||
"add x0, x0, x3, lsl #4",
|
||||
"ubfiz x3, x21, #4, #20",
|
||||
"add x0, x0, x3",
|
||||
"ldp x1, x0, [x0]"
|
||||
]
|
||||
}
|
||||
|
@ -56171,8 +56171,8 @@
|
||||
"bic w20, w23, w20",
|
||||
"strb w20, [x28, #1298]",
|
||||
"ldr x0, [x28, #2272]",
|
||||
"and x3, x21, #0xfffff",
|
||||
"add x0, x0, x3, lsl #4",
|
||||
"ubfiz x3, x21, #4, #20",
|
||||
"add x0, x0, x3",
|
||||
"ldp x1, x0, [x0]"
|
||||
]
|
||||
},
|
||||
@ -58291,8 +58291,8 @@
|
||||
"bic w20, w22, w20",
|
||||
"strb w20, [x28, #1298]",
|
||||
"ldr x0, [x28, #2272]",
|
||||
"and x3, x21, #0xfffff",
|
||||
"add x0, x0, x3, lsl #4",
|
||||
"ubfiz x3, x21, #4, #20",
|
||||
"add x0, x0, x3",
|
||||
"ldp x1, x0, [x0]"
|
||||
]
|
||||
},
|
||||
@ -58949,8 +58949,8 @@
|
||||
"bic w20, w22, w20",
|
||||
"strb w20, [x28, #1298]",
|
||||
"ldr x0, [x28, #2272]",
|
||||
"and x3, x21, #0xfffff",
|
||||
"add x0, x0, x3, lsl #4",
|
||||
"ubfiz x3, x21, #4, #20",
|
||||
"add x0, x0, x3",
|
||||
"ldp x1, x0, [x0]"
|
||||
]
|
||||
},
|
||||
@ -59329,8 +59329,8 @@
|
||||
"bic w20, w22, w20",
|
||||
"strb w20, [x28, #1298]",
|
||||
"ldr x0, [x28, #2272]",
|
||||
"and x3, x21, #0xfffff",
|
||||
"add x0, x0, x3, lsl #4",
|
||||
"ubfiz x3, x21, #4, #20",
|
||||
"add x0, x0, x3",
|
||||
"ldp x1, x0, [x0]"
|
||||
]
|
||||
},
|
||||
@ -74263,8 +74263,8 @@
|
||||
"orr w20, w23, w20",
|
||||
"strb w20, [x28, #1298]",
|
||||
"ldr x0, [x28, #2272]",
|
||||
"and x3, x21, #0xfffff",
|
||||
"add x0, x0, x3, lsl #4",
|
||||
"ubfiz x3, x21, #4, #20",
|
||||
"add x0, x0, x3",
|
||||
"ldp x1, x0, [x0]"
|
||||
]
|
||||
},
|
||||
@ -74926,8 +74926,8 @@
|
||||
"add w21, w20, w21",
|
||||
"str w20, [x8, #-4]!",
|
||||
"ldr x0, [x28, #2272]",
|
||||
"and x3, x21, #0xfffff",
|
||||
"add x0, x0, x3, lsl #4",
|
||||
"ubfiz x3, x21, #4, #20",
|
||||
"add x0, x0, x3",
|
||||
"ldp x1, x0, [x0]"
|
||||
]
|
||||
}
|
||||
|
@ -32172,8 +32172,8 @@
|
||||
"str w20, [x8, #-4]!",
|
||||
"cfinv",
|
||||
"ldr x0, [x28, #2272]",
|
||||
"and x3, x21, #0xfffff",
|
||||
"add x0, x0, x3, lsl #4",
|
||||
"ubfiz x3, x21, #4, #20",
|
||||
"add x0, x0, x3",
|
||||
"ldp x1, x0, [x0]"
|
||||
]
|
||||
},
|
||||
@ -65419,8 +65419,8 @@
|
||||
"str w20, [x8, #-4]!",
|
||||
"strb wzr, [x28, #1298]",
|
||||
"ldr x0, [x28, #2272]",
|
||||
"and x3, x21, #0xfffff",
|
||||
"add x0, x0, x3, lsl #4",
|
||||
"ubfiz x3, x21, #4, #20",
|
||||
"add x0, x0, x3",
|
||||
"ldp x1, x0, [x0]"
|
||||
]
|
||||
},
|
||||
@ -73416,8 +73416,8 @@
|
||||
"str w20, [x8, #-4]!",
|
||||
"strb wzr, [x28, #1298]",
|
||||
"ldr x0, [x28, #2272]",
|
||||
"and x3, x21, #0xfffff",
|
||||
"add x0, x0, x3, lsl #4",
|
||||
"ubfiz x3, x21, #4, #20",
|
||||
"add x0, x0, x3",
|
||||
"ldp x1, x0, [x0]"
|
||||
]
|
||||
},
|
||||
@ -97572,8 +97572,8 @@
|
||||
"bic w20, w22, w20",
|
||||
"strb w20, [x28, #1298]",
|
||||
"ldr x0, [x28, #2272]",
|
||||
"and x3, x21, #0xfffff",
|
||||
"add x0, x0, x3, lsl #4",
|
||||
"ubfiz x3, x21, #4, #20",
|
||||
"add x0, x0, x3",
|
||||
"ldp x1, x0, [x0]"
|
||||
]
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user