mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-02-25 01:01:20 +00:00
OpcodeDispatcher: Optimize PF in lahf
Use the raw popcount rather than the final PF and use some sneaky bit math to come out 1 instruction ahead. Closes #3117 Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
parent
1f02a6da34
commit
68d32ad70d
@ -160,7 +160,8 @@ OrderedNode *OpDispatchBuilder::GetPackedRFLAG(uint32_t FlagsMask) {
|
||||
|
||||
if ((GetNZ && (FlagOffset == FEXCore::X86State::RFLAG_SF_LOC ||
|
||||
FlagOffset == FEXCore::X86State::RFLAG_ZF_LOC)) ||
|
||||
FlagOffset == FEXCore::X86State::RFLAG_CF_LOC) {
|
||||
FlagOffset == FEXCore::X86State::RFLAG_CF_LOC ||
|
||||
FlagOffset == FEXCore::X86State::RFLAG_PF_LOC) {
|
||||
// Already handled
|
||||
continue;
|
||||
}
|
||||
@ -168,9 +169,7 @@ OrderedNode *OpDispatchBuilder::GetPackedRFLAG(uint32_t FlagsMask) {
|
||||
// Note that the Bfi only considers the bottom bit of the flag, the rest of
|
||||
// the byte is allowed to be garbage.
|
||||
OrderedNode *Flag;
|
||||
if (FlagOffset == FEXCore::X86State::RFLAG_PF_LOC)
|
||||
Flag = LoadPF();
|
||||
else if (FlagOffset == FEXCore::X86State::RFLAG_AF_LOC)
|
||||
if (FlagOffset == FEXCore::X86State::RFLAG_AF_LOC)
|
||||
Flag = LoadAF();
|
||||
else
|
||||
Flag = GetRFLAG(FlagOffset);
|
||||
@ -178,6 +177,20 @@ OrderedNode *OpDispatchBuilder::GetPackedRFLAG(uint32_t FlagsMask) {
|
||||
Original = _Orlshl(OpSize::i64Bit, Original, Flag, FlagOffset);
|
||||
}
|
||||
|
||||
// Raw PF value needs to have its bottom bit masked out and inverted. The
|
||||
// naive sequence is and/eor/orlshl. But we can do the inversion implicitly
|
||||
// instead.
|
||||
if (FlagsMask & (1 << FEXCore::X86State::RFLAG_PF_LOC)) {
|
||||
// Set every bit except the bottommost.
|
||||
auto OnesInvPF = _Or(OpSize::i64Bit, LoadPFRaw(), _Constant(~1ull));
|
||||
|
||||
// Rotate the bottom bit to the appropriate location for PF, so we get
|
||||
// something like 111P1111. Then invert that to get 000p0000. Then OR that
|
||||
// into the flags. This is 1 A64 instruction :-)
|
||||
auto RightRotation = 64 - FEXCore::X86State::RFLAG_PF_LOC;
|
||||
Original = _Ornror(OpSize::i64Bit, Original, OnesInvPF, RightRotation);
|
||||
}
|
||||
|
||||
// OR in the SF/ZF flags at the end, allowing the lshr to fold with the OR
|
||||
if (GetNZ) {
|
||||
static_assert(FEXCore::X86State::RFLAG_SF_LOC == (FEXCore::X86State::RFLAG_ZF_LOC + 1));
|
||||
|
Loading…
x
Reference in New Issue
Block a user