diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 88e038130..a040f2d52 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -1695,7 +1695,9 @@ void OpDispatchBuilder::RotateOp(OpcodeArgs) { } }; - Calculate_ShiftVariable(LoadShift(true), [this, LoadShift, Op]() { + Calculate_ShiftVariable( + Op, LoadShift(true), + [this, LoadShift, Op]() { const uint32_t Size = GetSrcBitSize(Op); const auto OpSize = Size == 64 ? OpSize::i64Bit : OpSize::i32Bit; @@ -1734,7 +1736,8 @@ void OpDispatchBuilder::RotateOp(OpcodeArgs) { auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, Left ? Size - 1 : 1); SetRFLAG(NewOF, Left ? 0 : Size - 2, true); } - }); + }, + GetSrcSize(Op) == OpSize::i32Bit ? std::make_optional(&OpDispatchBuilder::ZeroShiftResult) : std::nullopt); } void OpDispatchBuilder::ANDNBMIOp(OpcodeArgs) { @@ -2069,6 +2072,7 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) { if (IsValueConstant(WrapNode(Src), &Const)) { Const &= Mask; if (!Const) { + ZeroShiftResult(Op); return; } @@ -2103,7 +2107,9 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) { } Ref SrcMasked = _And(OpSize, Src, _Constant(Size, Mask)); - Calculate_ShiftVariable(SrcMasked, [this, Op, Size, OpSize]() { + Calculate_ShiftVariable( + Op, SrcMasked, + [this, Op, Size, OpSize]() { // Rematerialize loads to avoid crossblock liveness Ref Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); Ref Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); @@ -2139,7 +2145,8 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) { SetRFLAG(Xor, Size - 2, true); StoreResult(GPRClass, Op, Res, -1); - }); + }, + GetSrcSize(Op) == OpSize::i32Bit ? std::make_optional(&OpDispatchBuilder::ZeroShiftResult) : std::nullopt); } void OpDispatchBuilder::RCRSmallerOp(OpcodeArgs) { @@ -2153,7 +2160,7 @@ void OpDispatchBuilder::RCRSmallerOp(OpcodeArgs) { // CF only changes if we actually shifted. OF undefined if we didn't shift. // The result is unchanged if we didn't shift. So branch over the whole thing. - Calculate_ShiftVariable(Src, [this, Op, Size]() { + Calculate_ShiftVariable(Op, Src, [this, Op, Size]() { // Rematerialized to avoid crossblock liveness Ref Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); @@ -2289,6 +2296,7 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) { if (IsValueConstant(WrapNode(Src), &Const)) { Const &= Mask; if (!Const) { + ZeroShiftResult(Op); return; } @@ -2322,7 +2330,9 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) { } Ref SrcMasked = _And(OpSize, Src, _Constant(Size, Mask)); - Calculate_ShiftVariable(SrcMasked, [this, Op, Size, OpSize]() { + Calculate_ShiftVariable( + Op, SrcMasked, + [this, Op, Size, OpSize]() { // Rematerialized to avoid crossblock liveness Ref Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); @@ -2355,7 +2365,8 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) { SetRFLAG(NewOF, Size - 1, true); StoreResult(GPRClass, Op, Res, -1); - }); + }, + GetSrcSize(Op) == OpSize::i32Bit ? std::make_optional(&OpDispatchBuilder::ZeroShiftResult) : std::nullopt); } void OpDispatchBuilder::RCLSmallerOp(OpcodeArgs) { @@ -2369,7 +2380,7 @@ void OpDispatchBuilder::RCLSmallerOp(OpcodeArgs) { // CF only changes if we actually shifted. OF undefined if we didn't shift. // The result is unchanged if we didn't shift. So branch over the whole thing. - Calculate_ShiftVariable(Src, [this, Op, Size]() { + Calculate_ShiftVariable(Op, Src, [this, Op, Size]() { // Rematerialized to avoid crossblock liveness Ref Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); Src = AndConst(OpSize::i32Bit, Src, 0x1F); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index e1cdbd6f7..a47602b24 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -2100,13 +2100,28 @@ private: return CurrentDeferredFlags.Type == FlagsGenerationType::TYPE_NONE; } + void ZeroShiftResult(FEXCore::X86Tables::DecodedOp Op) { + // In the case of zero-rotate, we need to store the destination still to deal with 32-bit semantics. + const uint32_t Size = GetSrcSize(Op); + if (Size != OpSize::i32Bit) { + return; + } + auto Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + StoreResult(GPRClass, Op, Dest, -1); + } + + using ZeroShiftFunctionPtr = void (OpDispatchBuilder::*)(FEXCore::X86Tables::DecodedOp Op); + template - void Calculate_ShiftVariable(Ref Shift, F&& Calculate) { + void Calculate_ShiftVariable(FEXCore::X86Tables::DecodedOp Op, Ref Shift, F&& Calculate, + std::optional ZeroShiftResult = std::nullopt) { // RCR can call this with constants, so handle that without branching. uint64_t Const; if (IsValueConstant(WrapNode(Shift), &Const)) { if (Const) { Calculate(); + } else if (ZeroShiftResult) { + (this->*(*ZeroShiftResult))(Op); } return; @@ -2118,8 +2133,17 @@ private: // If the shift is zero, do not touch the flags. auto SetBlock = CreateNewCodeBlockAfter(GetCurrentBlock()); - auto EndBlock = CreateNewCodeBlockAfter(SetBlock); - CondJump(Shift, Zero, EndBlock, SetBlock, {COND_EQ}); + IRPair NextBlock = SetBlock; + IRPair ZeroShiftBlock; + if (ZeroShiftResult) { + ZeroShiftBlock = CreateNewCodeBlockAfter(NextBlock); + NextBlock = ZeroShiftBlock; + } + auto EndBlock = CreateNewCodeBlockAfter(NextBlock); + + ///< Jump to zeroshift block or end block depending on if it was provided. + IRPair TailHandling = ZeroShiftResult ? ZeroShiftBlock : EndBlock; + CondJump(Shift, Zero, TailHandling, SetBlock, {COND_EQ}); SetCurrentCodeBlock(SetBlock); StartNewBlock(); @@ -2128,6 +2152,15 @@ private: Jump(EndBlock); } + if (ZeroShiftResult) { + SetCurrentCodeBlock(ZeroShiftBlock); + StartNewBlock(); + { + (this->*(*ZeroShiftResult))(Op); + Jump(EndBlock); + } + } + SetCurrentCodeBlock(EndBlock); StartNewBlock(); PossiblySetNZCVBits |= OldSetNZCVBits;