mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-02-25 01:01:20 +00:00
OpcodeDispatcher: Don't mask small add/sub carries
For the GPR result, the masking already happens as part of the bfi. So the only point of masking is for the flag calculation. But actually, every flag except carry will ignore the upper bits anyway. And the carry calculation actually WANTS the upper bit as a faster impl. Deletes a pile of code both in FEX and the output :-) ADC/SBC could probably get similar treatment later. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
parent
5eed24a242
commit
5facb21d30
@ -322,7 +322,6 @@ void OpDispatchBuilder::CallbackReturnOp(OpcodeArgs) {
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::SecondaryALUOp(OpcodeArgs) {
|
||||
bool RequiresMask = false;
|
||||
FEXCore::IR::IROps IROp;
|
||||
#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg))
|
||||
switch (Op->OP) {
|
||||
@ -330,7 +329,6 @@ void OpDispatchBuilder::SecondaryALUOp(OpcodeArgs) {
|
||||
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 0):
|
||||
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 0):
|
||||
IROp = FEXCore::IR::IROps::OP_ADD;
|
||||
RequiresMask = true;
|
||||
break;
|
||||
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 1):
|
||||
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 1):
|
||||
@ -346,7 +344,6 @@ void OpDispatchBuilder::SecondaryALUOp(OpcodeArgs) {
|
||||
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 5):
|
||||
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 5):
|
||||
IROp = FEXCore::IR::IROps::OP_SUB;
|
||||
RequiresMask = true;
|
||||
break;
|
||||
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 6):
|
||||
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 6):
|
||||
@ -411,11 +408,6 @@ void OpDispatchBuilder::SecondaryALUOp(OpcodeArgs) {
|
||||
StoreResult(GPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
// Store result masks, but we need to
|
||||
if (RequiresMask && Size < 4) {
|
||||
Result = _Bfe(IR::SizeToOpSize(std::max<uint8_t>(4u, Size)), Size * 8, 0, Result);
|
||||
}
|
||||
|
||||
// Flags set
|
||||
{
|
||||
switch (IROp) {
|
||||
@ -1452,10 +1444,6 @@ void OpDispatchBuilder::CMPOp(OpcodeArgs) {
|
||||
auto ALUOp = _Sub(Size == 8 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src);
|
||||
|
||||
OrderedNode *Result = ALUOp;
|
||||
if (Size < 4) {
|
||||
Result = _Bfe(IR::SizeToOpSize(std::max<uint8_t>(4u, Size)), Size * 8, 0, ALUOp);
|
||||
}
|
||||
|
||||
GenerateFlags_SUB(Op, Result, Dest, Src);
|
||||
|
||||
flagsOp = SelectionFlag::CMP;
|
||||
@ -3427,10 +3415,6 @@ void OpDispatchBuilder::XADDOp(OpcodeArgs) {
|
||||
// Calculated value gets stored in dst (order is important if dst is same as src)
|
||||
StoreResult(GPRClass, Op, Result, -1);
|
||||
|
||||
if (Size < 32) {
|
||||
Result = _Bfe(OpSize::i32Bit, Size, 0, Result);
|
||||
}
|
||||
|
||||
GenerateFlags_ADD(Op, Result, Dest, Src);
|
||||
}
|
||||
else {
|
||||
@ -3440,10 +3424,6 @@ void OpDispatchBuilder::XADDOp(OpcodeArgs) {
|
||||
StoreResult(GPRClass, Op, Op->Src[0], Before, -1);
|
||||
Result = _Add(OpSize, Before, Src); // Seperate result just for flags
|
||||
|
||||
if (Size < 32) {
|
||||
Result = _Bfe(OpSize::i32Bit, Size, 0, Result);
|
||||
}
|
||||
|
||||
GenerateFlags_ADD(Op, Result, Before, Src);
|
||||
}
|
||||
}
|
||||
@ -3859,9 +3839,6 @@ void OpDispatchBuilder::INCOp(OpcodeArgs) {
|
||||
StoreResult(GPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
if (Size < 32) {
|
||||
Result = _Bfe(OpSize::i32Bit, Size, 0, Result);
|
||||
}
|
||||
GenerateFlags_ADD(Op, Result, Dest, OneConst, false);
|
||||
}
|
||||
|
||||
@ -3892,9 +3869,6 @@ void OpDispatchBuilder::DECOp(OpcodeArgs) {
|
||||
if (!IsLocked) {
|
||||
StoreResult(GPRClass, Op, Result, -1);
|
||||
}
|
||||
if (Size < 32) {
|
||||
Result = _Bfe(OpSize::i32Bit, Size, 0, Result);
|
||||
}
|
||||
|
||||
GenerateFlags_SUB(Op, Result, Dest, OneConst, false);
|
||||
}
|
||||
@ -4032,9 +4006,6 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
|
||||
auto Src2 = _LoadMemAutoTSO(GPRClass, Size, Dest_RSI, Size);
|
||||
|
||||
OrderedNode* Result = _Sub(Size == 8 ? OpSize::i64Bit : OpSize::i32Bit, Src2, Src1);
|
||||
if (Size < 4)
|
||||
Result = _Bfe(OpSize::i32Bit, Size * 8, 0, Result);
|
||||
|
||||
GenerateFlags_SUB(Op, Result, Src2, Src1);
|
||||
|
||||
auto DF = GetRFLAG(FEXCore::X86State::RFLAG_DF_LOC);
|
||||
@ -4094,9 +4065,6 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
|
||||
auto Src2 = _LoadMem(GPRClass, Size, Dest_RSI, Size);
|
||||
|
||||
OrderedNode* Result = _Sub(Size == 8 ? OpSize::i64Bit : OpSize::i32Bit, Src2, Src1);
|
||||
if (Size < 4)
|
||||
Result = _Bfe(OpSize::i32Bit, Size * 8, 0, Result);
|
||||
|
||||
GenerateFlags_SUB(Op, Result, Src2, Src1);
|
||||
|
||||
// Calculate flags early.
|
||||
@ -4259,8 +4227,6 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) {
|
||||
auto Src2 = _LoadMemAutoTSO(GPRClass, Size, Dest_RDI, Size);
|
||||
|
||||
OrderedNode* Result = _Sub(Size == 8 ? OpSize::i64Bit : OpSize::i32Bit, Src1, Src2);
|
||||
if (Size < 4)
|
||||
Result = _Bfe(OpSize::i32Bit, Size * 8, 0, Result);
|
||||
GenerateFlags_SUB(Op, Result, Src1, Src2);
|
||||
|
||||
auto SizeConst = _Constant(Size);
|
||||
@ -4322,9 +4288,6 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) {
|
||||
auto Src2 = _LoadMemAutoTSO(GPRClass, Size, Dest_RDI, Size);
|
||||
|
||||
OrderedNode* Result = _Sub(Size == 8 ? OpSize::i64Bit : OpSize::i32Bit, Src1, Src2);
|
||||
if (Size < 4)
|
||||
Result = _Bfe(OpSize::i32Bit, Size * 8, 0, Result);
|
||||
|
||||
GenerateFlags_SUB(Op, Result, Src1, Src2);
|
||||
|
||||
// Calculate flags early.
|
||||
@ -4435,9 +4398,6 @@ void OpDispatchBuilder::NEGOp(OpcodeArgs) {
|
||||
StoreResult(GPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
if (Size < 4)
|
||||
Result = _Bfe(OpSize::i32Bit, Size * 8, 0, Result);
|
||||
|
||||
GenerateFlags_SUB(Op, Result, ZeroConst, Dest);
|
||||
}
|
||||
|
||||
@ -4672,13 +4632,7 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) {
|
||||
StoreResult(GPRClass, Op, DestResult, -1);
|
||||
}
|
||||
|
||||
const auto Size = GetDstBitSize(Op);
|
||||
|
||||
OrderedNode *Result = _Sub(IR::SizeToOpSize(GPRSize), Src3Lower, CASResult);
|
||||
if (Size < 32) {
|
||||
Result = _Bfe(OpSize::i64Bit, Size, 0, Result);
|
||||
}
|
||||
|
||||
GenerateFlags_SUB(Op, Result, Src3Lower, CASResult);
|
||||
}
|
||||
else {
|
||||
@ -4721,10 +4675,6 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) {
|
||||
const auto Size = GetDstBitSize(Op);
|
||||
|
||||
OrderedNode *Result = _Sub(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Src3Lower, CASResult);
|
||||
if (Size < 32) {
|
||||
Result = _Bfe(OpSize::i32Bit, Size, 0, Result);
|
||||
}
|
||||
|
||||
GenerateFlags_SUB(Op, Result, Src3Lower, CASResult);
|
||||
}
|
||||
}
|
||||
@ -5476,7 +5426,7 @@ void OpDispatchBuilder::MOVGPRNTOp(OpcodeArgs) {
|
||||
StoreResult(GPRClass, Op, Src, 1, MemoryAccessType::ACCESS_STREAM);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp, bool RequiresMask) {
|
||||
void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp) {
|
||||
auto Size = GetDstSize(Op);
|
||||
const auto OpSize = Size == 8 ? OpSize::i64Bit : OpSize::i32Bit;
|
||||
|
||||
@ -5527,10 +5477,6 @@ void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCor
|
||||
StoreResult(GPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
if (RequiresMask && Size < 4) {
|
||||
Result = _Bfe(OpSize::i32Bit, Size * 8, 0, Result);
|
||||
}
|
||||
|
||||
// Flags set
|
||||
{
|
||||
switch (ALUIROp) {
|
||||
@ -5551,9 +5497,9 @@ void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCor
|
||||
}
|
||||
}
|
||||
|
||||
template<FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp, bool RequiresMask>
|
||||
template<FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp>
|
||||
void OpDispatchBuilder::ALUOp(OpcodeArgs) {
|
||||
ALUOpImpl(Op, ALUIROp, AtomicFetchOp, RequiresMask);
|
||||
ALUOpImpl(Op, ALUIROp, AtomicFetchOp);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::INTOp(OpcodeArgs) {
|
||||
@ -6342,19 +6288,19 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
|
||||
void InstallOpcodeHandlers(Context::OperatingMode Mode) {
|
||||
constexpr std::tuple<uint8_t, uint8_t, X86Tables::OpDispatchPtr> BaseOpTable[] = {
|
||||
// Instructions
|
||||
{0x00, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_ADD, FEXCore::IR::IROps::OP_ATOMICFETCHADD, true>},
|
||||
{0x00, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_ADD, FEXCore::IR::IROps::OP_ATOMICFETCHADD>},
|
||||
|
||||
{0x08, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_OR, FEXCore::IR::IROps::OP_ATOMICFETCHOR, false>},
|
||||
{0x08, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_OR, FEXCore::IR::IROps::OP_ATOMICFETCHOR>},
|
||||
|
||||
{0x10, 6, &OpDispatchBuilder::ADCOp<0>},
|
||||
|
||||
{0x18, 6, &OpDispatchBuilder::SBBOp<0, true>},
|
||||
|
||||
{0x20, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_AND, FEXCore::IR::IROps::OP_ATOMICFETCHAND, false>},
|
||||
{0x20, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_AND, FEXCore::IR::IROps::OP_ATOMICFETCHAND>},
|
||||
|
||||
{0x28, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_SUB, FEXCore::IR::IROps::OP_ATOMICFETCHSUB, true>},
|
||||
{0x28, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_SUB, FEXCore::IR::IROps::OP_ATOMICFETCHSUB>},
|
||||
|
||||
{0x30, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_XOR, FEXCore::IR::IROps::OP_ATOMICFETCHXOR, false>},
|
||||
{0x30, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_XOR, FEXCore::IR::IROps::OP_ATOMICFETCHXOR>},
|
||||
|
||||
{0x38, 6, &OpDispatchBuilder::CMPOp<0>},
|
||||
{0x50, 8, &OpDispatchBuilder::PUSHREGOp},
|
||||
|
@ -187,7 +187,7 @@ public:
|
||||
void MOVGPRNTOp(OpcodeArgs);
|
||||
void MOVVectorOp(OpcodeArgs);
|
||||
void MOVVectorNTOp(OpcodeArgs);
|
||||
template<FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp, bool RequiresMask>
|
||||
template<FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp>
|
||||
void ALUOp(OpcodeArgs);
|
||||
void INTOp(OpcodeArgs);
|
||||
void SyscallOp(OpcodeArgs);
|
||||
@ -881,7 +881,7 @@ private:
|
||||
// Used during new op bringup
|
||||
bool ShouldDump{false};
|
||||
|
||||
void ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp, bool RequiresMask);
|
||||
void ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp);
|
||||
|
||||
// Opcode helpers for generalizing behavior across VEX and non-VEX variants.
|
||||
|
||||
|
@ -556,8 +556,6 @@ void OpDispatchBuilder::CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, Or
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF) {
|
||||
auto Zero = _Constant(0);
|
||||
auto One = _Constant(1);
|
||||
auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
|
||||
|
||||
CalculateAF(OpSize, Res, Src1, Src2);
|
||||
@ -576,10 +574,9 @@ void OpDispatchBuilder::CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Res, Or
|
||||
|
||||
// CF
|
||||
if (UpdateCF) {
|
||||
auto SelectOp = _Select(FEXCore::IR::COND_ULT,
|
||||
Src1, Src2, One, Zero);
|
||||
|
||||
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(SelectOp);
|
||||
// Grab carry bit from unmasked output.
|
||||
auto Bfe = _Bfe(OpSize::i32Bit, 1, SrcSize * 8, Res);
|
||||
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(Bfe);
|
||||
}
|
||||
|
||||
CalculateOF(SrcSize, Res, Src1, Src2, true);
|
||||
@ -591,8 +588,6 @@ void OpDispatchBuilder::CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Res, Or
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::CalculateFlags_ADD(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF) {
|
||||
auto Zero = _Constant(0);
|
||||
auto One = _Constant(1);
|
||||
auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
|
||||
|
||||
CalculateAF(OpSize, Res, Src1, Src2);
|
||||
@ -610,9 +605,9 @@ void OpDispatchBuilder::CalculateFlags_ADD(uint8_t SrcSize, OrderedNode *Res, Or
|
||||
|
||||
// CF
|
||||
if (UpdateCF) {
|
||||
auto SelectOp = _Select(FEXCore::IR::COND_ULT, Res, Src2, One, Zero);
|
||||
|
||||
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(SelectOp);
|
||||
// Grab carry bit from unmasked output
|
||||
auto Bfe = _Bfe(OpSize::i32Bit, 1, SrcSize * 8, Res);
|
||||
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(Bfe);
|
||||
}
|
||||
|
||||
CalculateOF(SrcSize, Res, Src1, Src2, false);
|
||||
|
Loading…
x
Reference in New Issue
Block a user