OpcodeDispatcher: calculate PF with integer ops

based on clang's __builtin_parity Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
2025-02-01 20:16:20 +00:00 · 2024-04-01 13:58:07 -04:00 · 2024-04-01 13:58:07 -04:00 · 3b052e826f
commit 3b052e826f
parent 65ec191dc1
1 changed files with 5 additions and 6 deletions
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp
@ -242,13 +242,12 @@ OrderedNode *OpDispatchBuilder::LoadPFRaw() {
  // parity calculated.
  auto Result = GetRFLAG(FEXCore::X86State::RFLAG_PF_RAW_LOC);

-  // Cast the input to a 32-bit FPR. Logically we only need 8-bit, but that would
-  // generate unwanted an ubfx instruction. VPopcount will ignore the upper bits anyway.
-  auto InputFPR = _VCastFromGPR(4, 4, Result);
+  // Cascade to calculate parity of bottom 8-bits to bottom bit.
+  Result = _XorShift(OpSize::i32Bit, Result, Result, ShiftType::LSR, 4);
+  Result = _XorShift(OpSize::i32Bit, Result, Result, ShiftType::LSR, 2);
+  Result = _XorShift(OpSize::i32Bit, Result, Result, ShiftType::LSR, 1);

-  // Calculate the popcount.
-  auto Count = _VPopcount(1, 1, InputFPR);
-  return _VExtractToGPR(8, 1, Count, 0);
+  return Result;
 }

 OrderedNode *OpDispatchBuilder::LoadAF() {