OpcodeDispatcher: Optimize PF in lahf

Use the raw popcount rather than the final PF and use some sneaky bit math to
come out 1 instruction ahead.

Closes #3117

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
Alyssa Rosenzweig 2023-09-24 20:19:32 -04:00
parent 1f02a6da34
commit 68d32ad70d

View File

@ -160,7 +160,8 @@ OrderedNode *OpDispatchBuilder::GetPackedRFLAG(uint32_t FlagsMask) {
if ((GetNZ && (FlagOffset == FEXCore::X86State::RFLAG_SF_LOC ||
FlagOffset == FEXCore::X86State::RFLAG_ZF_LOC)) ||
FlagOffset == FEXCore::X86State::RFLAG_CF_LOC) {
FlagOffset == FEXCore::X86State::RFLAG_CF_LOC ||
FlagOffset == FEXCore::X86State::RFLAG_PF_LOC) {
// Already handled
continue;
}
@ -168,9 +169,7 @@ OrderedNode *OpDispatchBuilder::GetPackedRFLAG(uint32_t FlagsMask) {
// Note that the Bfi only considers the bottom bit of the flag, the rest of
// the byte is allowed to be garbage.
OrderedNode *Flag;
if (FlagOffset == FEXCore::X86State::RFLAG_PF_LOC)
Flag = LoadPF();
else if (FlagOffset == FEXCore::X86State::RFLAG_AF_LOC)
if (FlagOffset == FEXCore::X86State::RFLAG_AF_LOC)
Flag = LoadAF();
else
Flag = GetRFLAG(FlagOffset);
@ -178,6 +177,20 @@ OrderedNode *OpDispatchBuilder::GetPackedRFLAG(uint32_t FlagsMask) {
Original = _Orlshl(OpSize::i64Bit, Original, Flag, FlagOffset);
}
// Raw PF value needs to have its bottom bit masked out and inverted. The
// naive sequence is and/eor/orlshl. But we can do the inversion implicitly
// instead.
if (FlagsMask & (1 << FEXCore::X86State::RFLAG_PF_LOC)) {
// Set every bit except the bottommost.
auto OnesInvPF = _Or(OpSize::i64Bit, LoadPFRaw(), _Constant(~1ull));
// Rotate the bottom bit to the appropriate location for PF, so we get
// something like 111P1111. Then invert that to get 000p0000. Then OR that
// into the flags. This is 1 A64 instruction :-)
auto RightRotation = 64 - FEXCore::X86State::RFLAG_PF_LOC;
Original = _Ornror(OpSize::i64Bit, Original, OnesInvPF, RightRotation);
}
// OR in the SF/ZF flags at the end, allowing the lshr to fold with the OR
if (GetNZ) {
static_assert(FEXCore::X86State::RFLAG_SF_LOC == (FEXCore::X86State::RFLAG_ZF_LOC + 1));