IR: Removes implicit sized lshr

This commit is contained in:
Ryan Houdek 2023-08-28 18:16:38 -07:00
parent 8534d3dfbf
commit 65dc6f3e90
7 changed files with 40 additions and 49 deletions

View File

@ -1852,7 +1852,7 @@ void OpDispatchBuilder::SHROp(OpcodeArgs) {
Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, -1);
}
auto ALUOp = _Lshr(std::max<uint8_t>(4, GetSrcSize(Op)), Dest, Src);
auto ALUOp = _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4, GetSrcSize(Op))), Dest, Src);
StoreResult(GPRClass, Op, ALUOp, -1);
if constexpr (SHR1Bit) {
@ -1879,7 +1879,7 @@ void OpDispatchBuilder::SHRImmediateOp(OpcodeArgs) {
}
OrderedNode *Src = _Constant(Size, Shift);
auto ALUOp = _Lshr(Dest, Src);
auto ALUOp = _Lshr(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src);
StoreResult(GPRClass, Op, ALUOp, -1);
GenerateFlags_ShiftRightImmediate(Op, ALUOp, Dest, Shift);
@ -1905,9 +1905,8 @@ void OpDispatchBuilder::SHLDOp(OpcodeArgs) {
auto ShiftRight = _Sub(_Constant(Size), Shift);
auto Tmp1 = _Lshl(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Shift);
Tmp1.first->Header.Size = 8;
auto Tmp2 = _Lshr(Src, ShiftRight);
auto Tmp1 = _Lshl(OpSize::i64Bit, Dest, Shift);
auto Tmp2 = _Lshr(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Src, ShiftRight);
OrderedNode *Res = _Or(Tmp1, Tmp2);
@ -1966,9 +1965,8 @@ void OpDispatchBuilder::SHLDImmediateOp(OpcodeArgs) {
OrderedNode *ShiftLeft = _Constant(Shift);
auto ShiftRight = _Constant(Size - Shift);
auto Tmp1 = _Lshl(OpSize::i32Bit, Dest, ShiftLeft);
Tmp1.first->Header.Size = 8;
auto Tmp2 = _Lshr(Src, ShiftRight);
auto Tmp1 = _Lshl(OpSize::i64Bit, Dest, ShiftLeft);
auto Tmp2 = _Lshr(OpSize::i32Bit, Src, ShiftRight);
Res = _Or(Tmp1, Tmp2);
}
@ -2007,9 +2005,8 @@ void OpDispatchBuilder::SHRDOp(OpcodeArgs) {
auto ShiftLeft = _Sub(_Constant(Size), Shift);
auto Tmp1 = _Lshr(Dest, Shift);
auto Tmp2 = _Lshl(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Src, ShiftLeft);
Tmp2.first->Header.Size = 8;
auto Tmp1 = _Lshr(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Shift);
auto Tmp2 = _Lshl(OpSize::i64Bit, Src, ShiftLeft);
OrderedNode *Res = _Or(Tmp1, Tmp2);
@ -2067,7 +2064,7 @@ void OpDispatchBuilder::SHRDImmediateOp(OpcodeArgs) {
OrderedNode *ShiftRight = _Constant(Shift);
auto ShiftLeft = _Constant(Size - Shift);
auto Tmp1 = _Lshr(Dest, ShiftRight);
auto Tmp1 = _Lshr(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, ShiftRight);
auto Tmp2 = _Lshl(OpSize::i64Bit, Src, ShiftLeft);
Res = _Or(Tmp1, Tmp2);
@ -2320,7 +2317,7 @@ void OpDispatchBuilder::BEXTRBMIOp(OpcodeArgs) {
// Shift the operand down to the starting bit
auto Start = _Bfe(8, 0, Src2);
auto Shifted = _Lshr(Src1, Start);
auto Shifted = _Lshr(IR::SizeToOpSize(Size), Src1, Start);
// Shifts larger than operand size need to be set to zero.
auto SanitizedShifted = _Select(IR::COND_ULE,
@ -2401,7 +2398,7 @@ void OpDispatchBuilder::BMI2Shift(OpcodeArgs) {
}
// SHRX
return _Lshr(Size, Src, Shift);
return _Lshr(IR::SizeToOpSize(Size), Src, Shift);
}();
StoreResult(GPRClass, Op, Result, -1);
@ -2602,7 +2599,7 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) {
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC);
// Res = Src >> Shift
OrderedNode *Res = _Lshr(Dest, Src);
OrderedNode *Res = _Lshr(OpSizeFromSrc(Op), Dest, Src);
// Res |= (Src << (Size - Shift + 1));
OrderedNode *SrcShl = _Sub(_Constant(Size, Size + 1), Src);
@ -2631,7 +2628,7 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) {
// CF only changes if we actually shifted
// Our new CF will be bit (Shift - 1) of the source
auto NewCF = _Bfe(1, 0, _Lshr(Dest, _Sub(Src, One)));
auto NewCF = _Bfe(1, 0, _Lshr(OpSizeFromSrc(Op), Dest, _Sub(Src, One)));
CompareResult = _Select(FEXCore::IR::COND_UGE,
Src, One,
NewCF, CF);
@ -2723,14 +2720,14 @@ void OpDispatchBuilder::RCRSmallerOp(OpcodeArgs) {
// Entire bitfield has been setup
// Just extract the 8 or 16bits we need
OrderedNode *Res = _Lshr(Tmp, Src);
OrderedNode *Res = _Lshr(OpSize::i64Bit, Tmp, Src);
StoreResult(GPRClass, Op, Res, -1);
// CF only changes if we actually shifted
// Our new CF will be bit (Shift - 1) of the source
auto One = _Constant(Size, 1);
auto NewCF = _Bfe(1, 0, _Lshr(Tmp, _Sub(Src, One)));
auto NewCF = _Bfe(1, 0, _Lshr(OpSize::i64Bit, Tmp, _Sub(Src, One)));
auto CompareResult = _Select(FEXCore::IR::COND_UGE,
Src, One,
NewCF, CF);
@ -2792,7 +2789,7 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) {
// Res |= (Src << (Size - Shift + 1));
OrderedNode *SrcShl = _Sub(_Constant(Size, Size + 1), Src);
auto TmpHigher = _Lshr(Dest, SrcShl);
auto TmpHigher = _Lshr(OpSizeFromSrc(Op), Dest, SrcShl);
auto One = _Constant(Size, 1);
auto Zero = _Constant(Size, 0);
@ -2818,7 +2815,7 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) {
{
// CF only changes if we actually shifted
// Our new CF will be bit (Shift - 1) of the source
auto NewCF = _Bfe(1, 0, _Lshr(Dest, _Sub(_Constant(Size, Size), Src)));
auto NewCF = _Bfe(1, 0, _Lshr(OpSizeFromSrc(Op), Dest, _Sub(_Constant(Size, Size), Src)));
CompareResult = _Select(FEXCore::IR::COND_UGE,
Src, One,
NewCF, CF);
@ -2932,7 +2929,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs) {
BitSelect = _And(Src, SizeMask);
}
Result = _Lshr(Dest, BitSelect);
Result = _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4u, GetOpSize(Dest))), Dest, BitSelect);
} else {
// Load the address to the memory location
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, -1, false);
@ -2952,7 +2949,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs) {
Result = _LoadMemAutoTSO(GPRClass, 1, MemoryLocation, 1);
// Now shift in to the correct bit location
Result = _Lshr(Result, BitSelect);
Result = _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4u, GetOpSize(Result))), Result, BitSelect);
}
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(_Bfe(1, 0, Result));
}
@ -2991,7 +2988,7 @@ void OpDispatchBuilder::BTROp(OpcodeArgs) {
BitSelect = _And(Src, SizeMask);
}
Result = _Lshr(Dest, BitSelect);
Result = _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4u, GetOpSize(Dest))), Dest, BitSelect);
OrderedNode *BitMask = _Lshl(OpSize::i64Bit, _Constant(1), BitSelect);
Dest = _Andn(Dest, BitMask);
@ -3022,12 +3019,12 @@ void OpDispatchBuilder::BTROp(OpcodeArgs) {
// We don't current support this IR op though
Result = _AtomicFetchAnd(OpSize::i8Bit, BitMask, MemoryLocation);
// Now shift in to the correct bit location
Result = _Lshr(Result, BitSelect);
Result = _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4u, GetOpSize(Result))), Result, BitSelect);
} else {
OrderedNode *Value = _LoadMemAutoTSO(GPRClass, 1, MemoryLocation, 1);
// Now shift in to the correct bit location
Result = _Lshr(Value, BitSelect);
Result = _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4u, GetOpSize(Value))), Value, BitSelect);
Value = _Andn(Value, BitMask);
_StoreMemAutoTSO(GPRClass, 1, MemoryLocation, Value, 1);
}
@ -3069,7 +3066,7 @@ void OpDispatchBuilder::BTSOp(OpcodeArgs) {
BitSelect = _And(Src, SizeMask);
}
Result = _Lshr(Dest, BitSelect);
Result = _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4u, GetOpSize(Dest))), Dest, BitSelect);
OrderedNode *BitMask = _Lshl(OpSize::i64Bit, _Constant(1), BitSelect);
Dest = _Or(Dest, BitMask);
@ -3096,12 +3093,12 @@ void OpDispatchBuilder::BTSOp(OpcodeArgs) {
HandledLock = true;
Result = _AtomicFetchOr(OpSize::i8Bit, BitMask, MemoryLocation);
// Now shift in to the correct bit location
Result = _Lshr(Result, BitSelect);
Result = _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4u, GetOpSize(Result))), Result, BitSelect);
} else {
OrderedNode *Value = _LoadMemAutoTSO(GPRClass, 1, MemoryLocation, 1);
// Now shift in to the correct bit location
Result = _Lshr(Value, BitSelect);
Result = _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4u, GetOpSize(Value))), Value, BitSelect);
Value = _Or(Value, BitMask);
_StoreMemAutoTSO(GPRClass, 1, MemoryLocation, Value, 1);
}
@ -3143,7 +3140,7 @@ void OpDispatchBuilder::BTCOp(OpcodeArgs) {
BitSelect = _And(Src, SizeMask);
}
Result = _Lshr(Dest, BitSelect);
Result = _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4u, GetOpSize(Dest))), Dest, BitSelect);
OrderedNode *BitMask = _Lshl(OpSize::i64Bit, _Constant(1), BitSelect);
Dest = _Xor(Dest, BitMask);
@ -3170,12 +3167,12 @@ void OpDispatchBuilder::BTCOp(OpcodeArgs) {
HandledLock = true;
Result = _AtomicFetchXor(OpSize::i8Bit, BitMask, MemoryLocation);
// Now shift in to the correct bit location
Result = _Lshr(Result, BitSelect);
Result = _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4u, GetOpSize(Result))), Result, BitSelect);
} else {
OrderedNode *Value = _LoadMemAutoTSO(GPRClass, 1, MemoryLocation, 1);
// Now shift in to the correct bit location
Result = _Lshr(Value, BitSelect);
Result = _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4u, GetOpSize(Value))), Value, BitSelect);
Value = _Xor(Value, BitMask);
_StoreMemAutoTSO(GPRClass, 1, MemoryLocation, Value, 1);
}
@ -3625,7 +3622,7 @@ void OpDispatchBuilder::AADOp(OpcodeArgs) {
InvalidateDeferredFlags();
auto AL = LoadGPRRegister(X86State::REG_RAX, 1);
auto AH = _Lshr(LoadGPRRegister(X86State::REG_RAX, 2), _Constant(8));
auto AH = _Lshr(OpSize::i32Bit, LoadGPRRegister(X86State::REG_RAX, 2), _Constant(8));
auto Imm8 = _Constant(Op->Src[0].Data.Literal.Value & 0xFF);
auto NewAL = _Add(AL, _Mul(OpSize::i64Bit, AH, Imm8));
auto Result = _And(NewAL, _Constant(0xFF));

View File

@ -151,7 +151,7 @@ void OpDispatchBuilder::SHA1RNDS4Op(OpcodeArgs) {
void OpDispatchBuilder::SHA256MSG1Op(OpcodeArgs) {
const auto Sigma0 = [this](OrderedNode* W) -> OrderedNode* {
return _Xor(_Xor(_Ror(OpSize::i32Bit, W, _Constant(32, 7)), _Ror(OpSize::i32Bit, W, _Constant(32, 18))), _Lshr(W, _Constant(32, 3)));
return _Xor(_Xor(_Ror(OpSize::i32Bit, W, _Constant(32, 7)), _Ror(OpSize::i32Bit, W, _Constant(32, 18))), _Lshr(OpSize::i32Bit, W, _Constant(32, 3)));
};
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
@ -178,7 +178,7 @@ void OpDispatchBuilder::SHA256MSG1Op(OpcodeArgs) {
void OpDispatchBuilder::SHA256MSG2Op(OpcodeArgs) {
const auto Sigma1 = [this](OrderedNode* W) -> OrderedNode* {
return _Xor(_Xor(_Ror(OpSize::i32Bit, W, _Constant(32, 17)), _Ror(OpSize::i32Bit, W, _Constant(32, 19))), _Lshr(W, _Constant(32, 10)));
return _Xor(_Xor(_Ror(OpSize::i32Bit, W, _Constant(32, 17)), _Ror(OpSize::i32Bit, W, _Constant(32, 19))), _Lshr(OpSize::i32Bit, W, _Constant(32, 10)));
};
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);

View File

@ -629,7 +629,7 @@ void OpDispatchBuilder::CalculateFlags_ShiftLeft(uint8_t SrcSize, OrderedNode *R
// Extract the last bit shifted in to CF
auto Size = _Constant(SrcSize * 8);
auto ShiftAmt = _Sub(Size, Src2);
auto LastBit = _Bfe(1, 0, _Lshr(Src1, ShiftAmt));
auto LastBit = _Bfe(1, 0, _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4u, SrcSize)), Src1, ShiftAmt));
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(LastBit);
}
@ -664,7 +664,7 @@ void OpDispatchBuilder::CalculateFlags_ShiftRight(uint8_t SrcSize, OrderedNode *
{
// Extract the last bit shifted in to CF
auto ShiftAmt = _Sub(Src2, One);
auto LastBit = _Bfe(1, 0, _Lshr(Src1, ShiftAmt));
auto LastBit = _Bfe(1, 0, _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4u, SrcSize)), Src1, ShiftAmt));
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(LastBit);
}
@ -700,7 +700,7 @@ void OpDispatchBuilder::CalculateFlags_SignShiftRight(uint8_t SrcSize, OrderedNo
{
// Extract the last bit shifted in to CF
auto ShiftAmt = _Sub(Src2, One);
auto LastBit = _Bfe(1, 0, _Lshr(Src1, ShiftAmt));
auto LastBit = _Bfe(1, 0, _Lshr(IR::SizeToOpSize(std::max<uint8_t>(4u, GetOpSize(Src1))), Src1, ShiftAmt));
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(LastBit);
}

View File

@ -784,7 +784,7 @@ void OpDispatchBuilder::MOVMSKOp(OpcodeArgs) {
// Inserting the full lower 32-bits offset 31 so the sign bit ends up at offset 63.
GPR = _Bfi(8, 32, 31, GPR, GPR);
// Shift right to only get the two sign bits we care about.
GPR = _Lshr(GPR, _Constant(62));
GPR = _Lshr(OpSize::i64Bit, GPR, _Constant(62));
StoreResult_WithOpSize(GPRClass, Op, Op->Dest, GPR, CTX->GetGPRSize(), -1);
}
else if (Size == 16 && ElementSize == 4) {

View File

@ -48,7 +48,7 @@ OrderedNode *OpDispatchBuilder::GetX87FTW(OrderedNode *Value) {
auto FTW = _LoadContext(2, GPRClass, offsetof(FEXCore::Core::CPUState, FTW));
OrderedNode *Mask = _Constant(0b11);
auto TopOffset = _Lshl(OpSize::i32Bit, Value, _Constant(1));
auto NewFTW = _Lshr(FTW, TopOffset);
auto NewFTW = _Lshr(OpSize::i32Bit, FTW, TopOffset);
return _And(NewFTW, Mask);
}

View File

@ -72,7 +72,7 @@ void OpDispatchBuilder::X87LDENVF64(OpcodeArgs) {
OrderedNode *roundingMode = NewFCW;
auto roundShift = _Constant(10);
auto roundMask = _Constant(3);
roundingMode = _Lshr(roundingMode, roundShift);
roundingMode = _Lshr(OpSize::i32Bit, roundingMode, roundShift);
roundingMode = _And(roundingMode, roundMask);
_SetRoundingMode(roundingMode);
_F80LoadFCW(NewFCW);
@ -113,7 +113,7 @@ void OpDispatchBuilder::X87FLDCWF64(OpcodeArgs) {
OrderedNode *roundingMode = NewFCW;
auto shift = _Constant(10);
auto mask = _Constant(3);
roundingMode = _Lshr(roundingMode, shift);
roundingMode = _Lshr(OpSize::i32Bit, roundingMode, shift);
roundingMode = _And(roundingMode, mask);
_SetRoundingMode(roundingMode);
_StoreContext(2, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW));
@ -664,7 +664,7 @@ void OpDispatchBuilder::FXTRACTF64(OpcodeArgs) {
auto a = _LoadContextIndexed(orig_top, 8, MMBaseOffset(), 16, FPRClass);
auto gpr = _VExtractToGPR(8, 8, a, 0);
OrderedNode* exp = _And(gpr, _Constant(0x7ff0000000000000LL));
exp = _Lshr(exp, _Constant(52));
exp = _Lshr(OpSize::i64Bit, exp, _Constant(52));
exp = _Sub(exp, _Constant(1023));
exp = _Float_FromGPR_S(8, 8, exp);
OrderedNode* sig = _And(gpr, _Constant(0x800fffffffffffffLL));
@ -1045,7 +1045,7 @@ void OpDispatchBuilder::X87FRSTORF64(OpcodeArgs) {
OrderedNode *roundingMode = NewFCW;
auto roundShift = _Constant(10);
auto roundMask = _Constant(3);
roundingMode = _Lshr(roundingMode, roundShift);
roundingMode = _Lshr(OpSize::i32Bit, roundingMode, roundShift);
roundingMode = _And(roundingMode, roundMask);
_SetRoundingMode(roundingMode);
_F80LoadFCW(NewFCW);

View File

@ -87,9 +87,6 @@ friend class FEXCore::IR::PassManager;
IRPair<IROp_StoreMemTSO> _StoreMemTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, OrderedNode *Addr, OrderedNode *Value, uint8_t Align = 1) {
return _StoreMemTSO(Class, Size, Value, Addr, Invalid(), Align, MEM_OFFSET_SXTX, 1);
}
IRPair<IROp_Lshr> _Lshr(OrderedNode *Src1, OrderedNode *Src2) {
return _Lshr(std::max<uint8_t>(4, GetOpSize(Src1)), Src1, Src2);
}
OrderedNode *Invalid() {
return InvalidNode;
}
@ -114,9 +111,6 @@ friend class FEXCore::IR::PassManager;
IRPair<IROp_Andn> _Andn(OrderedNode *_Src1, OrderedNode *_Src2) {
return _Andn(static_cast<OpSize>(std::max<uint8_t>(4, std::max(GetOpSize(_Src1), GetOpSize(_Src2)))), _Src1, _Src2);
}
IRPair<IROp_Lshr> _Lshr(uint8_t Size, OrderedNode *_Src1, OrderedNode *_Src2) {
return _Lshr(static_cast<OpSize>(Size), _Src1, _Src2);
}
IRPair<IROp_Bfi> _Bfi(uint8_t DestSize, uint8_t _Width, uint8_t _lsb, OrderedNode *_Dest, OrderedNode *_Src) {
return _Bfi(static_cast<OpSize>(DestSize), _Width, _lsb, _Dest, _Src);
}