OpcodeDispatcher: use fused sbcs calculations

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
Alyssa Rosenzweig 2024-02-26 15:24:14 -04:00
parent 15db72ef60
commit a750870abf
3 changed files with 12 additions and 39 deletions

View File

@ -437,10 +437,7 @@ void OpDispatchBuilder::SBBOp(OpcodeArgs) {
}
if (SetFlags) {
if (Size < 4) {
Result = _Bfe(IR::SizeToOpSize(std::max<uint8_t>(4u, Size)), Size * 8, 0, Result);
}
GenerateFlags_SBB(Op, Result, Before, Src, CF);
CalculateFlags_SBB(Size, Before, Src);
}
}

View File

@ -78,7 +78,6 @@ friend class FEXCore::IR::PassManager;
public:
enum class FlagsGenerationType : uint8_t {
TYPE_NONE,
TYPE_SBB,
TYPE_SUB,
TYPE_MUL,
TYPE_UMUL,
@ -1645,13 +1644,6 @@ private:
OrderedNode *Src2;
} TwoSource;
// SBB
struct {
OrderedNode *Src1;
OrderedNode *Src2;
OrderedNode *Src3;
} ThreeSource;
// LSHLI, LSHRI, ASHRI, RORI, ROLI
struct {
OrderedNode *Src1;
@ -1746,7 +1738,7 @@ private:
void CalculateOF(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, bool Sub);
OrderedNode *CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Src1, OrderedNode *Src2);
void CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF);
OrderedNode *CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Src1, OrderedNode *Src2);
OrderedNode *CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF = true);
OrderedNode *CalculateFlags_ADD(uint8_t SrcSize, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF = true);
void CalculateFlags_MUL(uint8_t SrcSize, OrderedNode *Res, OrderedNode *High);
@ -1779,21 +1771,6 @@ private:
*
* Depending on the operation it may force a RFLAGs calculation before storing the new deferred state.
* @{ */
void GenerateFlags_SBB(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) {
CurrentDeferredFlags = DeferredFlagData {
.Type = FlagsGenerationType::TYPE_SBB,
.SrcSize = GetSrcSize(Op),
.Res = Res,
.Sources = {
.ThreeSource = {
.Src1 = Src1,
.Src2 = Src2,
.Src3 = CF,
},
},
};
}
void GenerateFlags_SUB(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF = true) {
if (!UpdateCF) {
// If we aren't updating CF then we need to calculate flags. Invalidation mask would make this not required.

View File

@ -310,14 +310,6 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) {
}
switch (CurrentDeferredFlags.Type) {
case FlagsGenerationType::TYPE_SBB:
CalculateFlags_SBB(
CurrentDeferredFlags.SrcSize,
CurrentDeferredFlags.Res,
CurrentDeferredFlags.Sources.ThreeSource.Src1,
CurrentDeferredFlags.Sources.ThreeSource.Src2,
CurrentDeferredFlags.Sources.ThreeSource.Src3);
break;
case FlagsGenerationType::TYPE_SUB:
CalculateFlags_SUB(
CurrentDeferredFlags.SrcSize,
@ -505,24 +497,28 @@ OrderedNode *OpDispatchBuilder::CalculateFlags_ADC(uint8_t SrcSize, OrderedNode
return Res;
}
void OpDispatchBuilder::CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) {
OrderedNode *OpDispatchBuilder::CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Src1, OrderedNode *Src2) {
auto Zero = _Constant(0);
auto One = _Constant(1);
auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
CalculateAF(Src1, Src2);
CalculatePF(Res);
OrderedNode *Res;
if (SrcSize >= 4) {
// Rectify input carry
CarryInvert();
HandleNZCV_RMW();
_SbbNZCV(OpSize, Src1, Src2);
Res = _SbbWithFlags(OpSize, Src1, Src2);
// Rectify output carry
CarryInvert();
} else {
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);
Res = _Sub(OpSize, Src1, _Add(OpSize, Src2, CF));
Res = _Bfe(OpSize, SrcSize * 8, 0, Res);
// SF/ZF
SetNZ_ZeroCV(SrcSize, Res);
@ -538,6 +534,9 @@ void OpDispatchBuilder::CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, Or
// Signed
CalculateOF(SrcSize, Res, Src1, Src2, true);
}
CalculatePF(Res);
return Res;
}
OrderedNode *OpDispatchBuilder::CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF) {