OpcodeDispatcher: Fixes rotates with zero not zero extending 32-bit result

For all the 32-bit rotates (except for RORX) we were failing to zero
extend the 32-bit result to the destination register when the rotate was
masked to zero.

Ensure we do this.
This commit is contained in:
Ryan Houdek 2024-07-04 12:06:40 -07:00
parent d3399a261b
commit f6ec99bede
No known key found for this signature in database
2 changed files with 55 additions and 11 deletions

View File

@ -1695,7 +1695,9 @@ void OpDispatchBuilder::RotateOp(OpcodeArgs) {
}
};
Calculate_ShiftVariable(LoadShift(true), [this, LoadShift, Op]() {
Calculate_ShiftVariable(
Op, LoadShift(true),
[this, LoadShift, Op]() {
const uint32_t Size = GetSrcBitSize(Op);
const auto OpSize = Size == 64 ? OpSize::i64Bit : OpSize::i32Bit;
@ -1734,7 +1736,8 @@ void OpDispatchBuilder::RotateOp(OpcodeArgs) {
auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, Left ? Size - 1 : 1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, Left ? 0 : Size - 2, true);
}
});
},
GetSrcSize(Op) == OpSize::i32Bit ? std::make_optional(&OpDispatchBuilder::ZeroShiftResult) : std::nullopt);
}
void OpDispatchBuilder::ANDNBMIOp(OpcodeArgs) {
@ -2069,6 +2072,7 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) {
if (IsValueConstant(WrapNode(Src), &Const)) {
Const &= Mask;
if (!Const) {
ZeroShiftResult(Op);
return;
}
@ -2103,7 +2107,9 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) {
}
Ref SrcMasked = _And(OpSize, Src, _Constant(Size, Mask));
Calculate_ShiftVariable(SrcMasked, [this, Op, Size, OpSize]() {
Calculate_ShiftVariable(
Op, SrcMasked,
[this, Op, Size, OpSize]() {
// Rematerialize loads to avoid crossblock liveness
Ref Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});
Ref Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
@ -2139,7 +2145,8 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) {
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Xor, Size - 2, true);
StoreResult(GPRClass, Op, Res, -1);
});
},
GetSrcSize(Op) == OpSize::i32Bit ? std::make_optional(&OpDispatchBuilder::ZeroShiftResult) : std::nullopt);
}
void OpDispatchBuilder::RCRSmallerOp(OpcodeArgs) {
@ -2153,7 +2160,7 @@ void OpDispatchBuilder::RCRSmallerOp(OpcodeArgs) {
// CF only changes if we actually shifted. OF undefined if we didn't shift.
// The result is unchanged if we didn't shift. So branch over the whole thing.
Calculate_ShiftVariable(Src, [this, Op, Size]() {
Calculate_ShiftVariable(Op, Src, [this, Op, Size]() {
// Rematerialized to avoid crossblock liveness
Ref Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});
@ -2289,6 +2296,7 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) {
if (IsValueConstant(WrapNode(Src), &Const)) {
Const &= Mask;
if (!Const) {
ZeroShiftResult(Op);
return;
}
@ -2322,7 +2330,9 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) {
}
Ref SrcMasked = _And(OpSize, Src, _Constant(Size, Mask));
Calculate_ShiftVariable(SrcMasked, [this, Op, Size, OpSize]() {
Calculate_ShiftVariable(
Op, SrcMasked,
[this, Op, Size, OpSize]() {
// Rematerialized to avoid crossblock liveness
Ref Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});
@ -2355,7 +2365,8 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) {
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, Size - 1, true);
StoreResult(GPRClass, Op, Res, -1);
});
},
GetSrcSize(Op) == OpSize::i32Bit ? std::make_optional(&OpDispatchBuilder::ZeroShiftResult) : std::nullopt);
}
void OpDispatchBuilder::RCLSmallerOp(OpcodeArgs) {
@ -2369,7 +2380,7 @@ void OpDispatchBuilder::RCLSmallerOp(OpcodeArgs) {
// CF only changes if we actually shifted. OF undefined if we didn't shift.
// The result is unchanged if we didn't shift. So branch over the whole thing.
Calculate_ShiftVariable(Src, [this, Op, Size]() {
Calculate_ShiftVariable(Op, Src, [this, Op, Size]() {
// Rematerialized to avoid crossblock liveness
Ref Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});
Src = AndConst(OpSize::i32Bit, Src, 0x1F);

View File

@ -2100,13 +2100,28 @@ private:
return CurrentDeferredFlags.Type == FlagsGenerationType::TYPE_NONE;
}
void ZeroShiftResult(FEXCore::X86Tables::DecodedOp Op) {
// In the case of zero-rotate, we need to store the destination still to deal with 32-bit semantics.
const uint32_t Size = GetSrcSize(Op);
if (Size != OpSize::i32Bit) {
return;
}
auto Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags);
StoreResult(GPRClass, Op, Dest, -1);
}
using ZeroShiftFunctionPtr = void (OpDispatchBuilder::*)(FEXCore::X86Tables::DecodedOp Op);
template<typename F>
void Calculate_ShiftVariable(Ref Shift, F&& Calculate) {
void Calculate_ShiftVariable(FEXCore::X86Tables::DecodedOp Op, Ref Shift, F&& Calculate,
std::optional<ZeroShiftFunctionPtr> ZeroShiftResult = std::nullopt) {
// RCR can call this with constants, so handle that without branching.
uint64_t Const;
if (IsValueConstant(WrapNode(Shift), &Const)) {
if (Const) {
Calculate();
} else if (ZeroShiftResult) {
(this->*(*ZeroShiftResult))(Op);
}
return;
@ -2118,8 +2133,17 @@ private:
// If the shift is zero, do not touch the flags.
auto SetBlock = CreateNewCodeBlockAfter(GetCurrentBlock());
auto EndBlock = CreateNewCodeBlockAfter(SetBlock);
CondJump(Shift, Zero, EndBlock, SetBlock, {COND_EQ});
IRPair<IROp_CodeBlock> NextBlock = SetBlock;
IRPair<IROp_CodeBlock> ZeroShiftBlock;
if (ZeroShiftResult) {
ZeroShiftBlock = CreateNewCodeBlockAfter(NextBlock);
NextBlock = ZeroShiftBlock;
}
auto EndBlock = CreateNewCodeBlockAfter(NextBlock);
///< Jump to zeroshift block or end block depending on if it was provided.
IRPair<IROp_CodeBlock> TailHandling = ZeroShiftResult ? ZeroShiftBlock : EndBlock;
CondJump(Shift, Zero, TailHandling, SetBlock, {COND_EQ});
SetCurrentCodeBlock(SetBlock);
StartNewBlock();
@ -2128,6 +2152,15 @@ private:
Jump(EndBlock);
}
if (ZeroShiftResult) {
SetCurrentCodeBlock(ZeroShiftBlock);
StartNewBlock();
{
(this->*(*ZeroShiftResult))(Op);
Jump(EndBlock);
}
}
SetCurrentCodeBlock(EndBlock);
StartNewBlock();
PossiblySetNZCVBits |= OldSetNZCVBits;