mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-01-31 11:32:07 +00:00
IR,OpcodeDispatcher,JIT: fuse adcs flags
The usual tricks, also requires introducing a bare adc op to optimize adcs to, but we wanted that anyway! Also support a zero source, so we can calculate "foo + CF" in one instruction to optimize the "lock adc" cases. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
parent
0ef72bf118
commit
6994fc3a01
@ -143,6 +143,26 @@ DEF_OP(AdcNZCV) {
|
||||
adcs(EmitSize, ARMEmitter::Reg::zr, GetReg(Op->Src1.ID()), GetReg(Op->Src2.ID()));
|
||||
}
|
||||
|
||||
DEF_OP(AdcWithFlags) {
|
||||
auto Op = IROp->C<IR::IROp_AdcWithFlags>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
LOGMAN_THROW_AA_FMT(OpSize == IR::i32Bit || OpSize == IR::i64Bit, "Unsupported {} size: {}", __func__, OpSize);
|
||||
const auto EmitSize = OpSize == IR::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
|
||||
|
||||
adcs(EmitSize, GetReg(Node), GetZeroableReg(Op->Src1), GetReg(Op->Src2.ID()));
|
||||
}
|
||||
|
||||
DEF_OP(Adc) {
|
||||
auto Op = IROp->C<IR::IROp_Adc>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
LOGMAN_THROW_AA_FMT(OpSize == IR::i32Bit || OpSize == IR::i64Bit, "Unsupported {} size: {}", __func__, OpSize);
|
||||
const auto EmitSize = OpSize == IR::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
|
||||
|
||||
adc(EmitSize, GetReg(Node), GetZeroableReg(Op->Src1), GetReg(Op->Src2.ID()));
|
||||
}
|
||||
|
||||
DEF_OP(SbbNZCV) {
|
||||
auto Op = IROp->C<IR::IROp_SbbNZCV>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
@ -391,27 +391,22 @@ void OpDispatchBuilder::ADCOp(OpcodeArgs) {
|
||||
uint8_t Size = GetDstSize(Op);
|
||||
const auto OpSize = IR::SizeToOpSize(std::max<uint8_t>(4u, Size));
|
||||
|
||||
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);
|
||||
auto ALUOp = _Add(OpSize, Src, CF);
|
||||
|
||||
OrderedNode *Result{};
|
||||
OrderedNode *Before{};
|
||||
if (DestIsLockedMem(Op)) {
|
||||
auto ALUOp = _Adc(OpSize, _Constant(0), Src);
|
||||
|
||||
HandledLock = true;
|
||||
OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false});
|
||||
DestMem = AppendSegmentOffset(DestMem, Op->Flags);
|
||||
Before = _AtomicFetchAdd(IR::SizeToOpSize(Size), ALUOp, DestMem);
|
||||
Result = _Add(OpSize, Before, ALUOp);
|
||||
}
|
||||
else {
|
||||
Before = LoadSource(GPRClass, Op, Op->Dest, Op->Flags);
|
||||
Result = _Add(OpSize, Before, ALUOp);
|
||||
StoreResult(GPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
if (Size < 4)
|
||||
Result = _Bfe(IR::SizeToOpSize(std::max<uint8_t>(4u, Size)), Size * 8, 0, Result);
|
||||
GenerateFlags_ADC(Op, Result, Before, Src, CF);
|
||||
OrderedNode *Result = CalculateFlags_ADC(Size, Before, Src);
|
||||
if (!DestIsLockedMem(Op))
|
||||
StoreResult(GPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
template<uint32_t SrcIndex, bool SetFlags>
|
||||
|
@ -78,7 +78,6 @@ friend class FEXCore::IR::PassManager;
|
||||
public:
|
||||
enum class FlagsGenerationType : uint8_t {
|
||||
TYPE_NONE,
|
||||
TYPE_ADC,
|
||||
TYPE_SBB,
|
||||
TYPE_SUB,
|
||||
TYPE_MUL,
|
||||
@ -1646,7 +1645,7 @@ private:
|
||||
OrderedNode *Src2;
|
||||
} TwoSource;
|
||||
|
||||
// ADC, SBB
|
||||
// SBB
|
||||
struct {
|
||||
OrderedNode *Src1;
|
||||
OrderedNode *Src2;
|
||||
@ -1746,7 +1745,7 @@ private:
|
||||
void CalculateAF(OrderedNode *Src1, OrderedNode *Src2);
|
||||
|
||||
void CalculateOF(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, bool Sub);
|
||||
void CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF);
|
||||
OrderedNode *CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Src1, OrderedNode *Src2);
|
||||
void CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF);
|
||||
OrderedNode *CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF = true);
|
||||
OrderedNode *CalculateFlags_ADD(uint8_t SrcSize, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF = true);
|
||||
@ -1780,21 +1779,6 @@ private:
|
||||
*
|
||||
* Depending on the operation it may force a RFLAGs calculation before storing the new deferred state.
|
||||
* @{ */
|
||||
void GenerateFlags_ADC(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) {
|
||||
CurrentDeferredFlags = DeferredFlagData {
|
||||
.Type = FlagsGenerationType::TYPE_ADC,
|
||||
.SrcSize = GetSrcSize(Op),
|
||||
.Res = Res,
|
||||
.Sources = {
|
||||
.ThreeSource = {
|
||||
.Src1 = Src1,
|
||||
.Src2 = Src2,
|
||||
.Src3 = CF,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
void GenerateFlags_SBB(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) {
|
||||
CurrentDeferredFlags = DeferredFlagData {
|
||||
.Type = FlagsGenerationType::TYPE_SBB,
|
||||
|
@ -310,14 +310,6 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) {
|
||||
}
|
||||
|
||||
switch (CurrentDeferredFlags.Type) {
|
||||
case FlagsGenerationType::TYPE_ADC:
|
||||
CalculateFlags_ADC(
|
||||
CurrentDeferredFlags.SrcSize,
|
||||
CurrentDeferredFlags.Res,
|
||||
CurrentDeferredFlags.Sources.ThreeSource.Src1,
|
||||
CurrentDeferredFlags.Sources.ThreeSource.Src2,
|
||||
CurrentDeferredFlags.Sources.ThreeSource.Src3);
|
||||
break;
|
||||
case FlagsGenerationType::TYPE_SBB:
|
||||
CalculateFlags_SBB(
|
||||
CurrentDeferredFlags.SrcSize,
|
||||
@ -477,18 +469,22 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) {
|
||||
NZCVDirty = false;
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) {
|
||||
OrderedNode *OpDispatchBuilder::CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Src1, OrderedNode *Src2) {
|
||||
auto Zero = _Constant(0);
|
||||
auto One = _Constant(1);
|
||||
auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
|
||||
OrderedNode *Res;
|
||||
|
||||
CalculateAF(Src1, Src2);
|
||||
CalculatePF(Res);
|
||||
|
||||
if (SrcSize >= 4) {
|
||||
HandleNZCV_RMW();
|
||||
_AdcNZCV(OpSize, Src1, Src2);
|
||||
Res = _AdcWithFlags(OpSize, Src1, Src2);
|
||||
} else {
|
||||
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);
|
||||
Res = _Add(OpSize, _Add(OpSize, Src1, Src2), CF);
|
||||
Res = _Bfe(OpSize, SrcSize * 8, 0, Res);
|
||||
|
||||
// SF/ZF
|
||||
SetNZ_ZeroCV(SrcSize, Res);
|
||||
|
||||
@ -504,6 +500,9 @@ void OpDispatchBuilder::CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Res, Or
|
||||
// Signed
|
||||
CalculateOF(SrcSize, Res, Src1, Src2, false);
|
||||
}
|
||||
|
||||
CalculatePF(Res);
|
||||
return Res;
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) {
|
||||
|
@ -951,6 +951,15 @@
|
||||
"Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
|
||||
]
|
||||
},
|
||||
"GPR = Adc OpSize:#Size, GPR:$Src1, GPR:$Src2": {
|
||||
"Desc": [ "Integer Add with carry",
|
||||
"Will truncate to 64 or 32bits"
|
||||
],
|
||||
"DestSize": "Size",
|
||||
"EmitValidation": [
|
||||
"Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
|
||||
]
|
||||
},
|
||||
"GPR = AddShift OpSize:#Size, GPR:$Src1, GPR:$Src2, ShiftType:$Shift{ShiftType::LSL}, u8:$ShiftAmount{0}": {
|
||||
"Desc": [ "Integer Add with shifted register",
|
||||
"Will truncate to 64 or 32bits"
|
||||
@ -994,6 +1003,14 @@
|
||||
"Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
|
||||
]
|
||||
},
|
||||
"GPR = AdcWithFlags OpSize:#Size, GPR:$Src1, GPR:$Src2": {
|
||||
"Desc": ["Adds and set NZCV for the sum of two GPRs and carry-in given as NZCV"],
|
||||
"HasSideEffects": true,
|
||||
"DestSize": "Size",
|
||||
"EmitValidation": [
|
||||
"Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
|
||||
]
|
||||
},
|
||||
"AdcNZCV OpSize:#Size, GPR:$Src1, GPR:$Src2": {
|
||||
"Desc": ["Set NZCV for the sum of two GPRs and carry-in given as NZCV"],
|
||||
"HasSideEffects": true,
|
||||
|
@ -995,6 +995,22 @@ bool ConstProp::ConstantInlining(IREmitter *IREmit, const IRListView& CurrentIR)
|
||||
|
||||
break;
|
||||
}
|
||||
case OP_ADC:
|
||||
case OP_ADCWITHFLAGS:
|
||||
{
|
||||
auto Op = IROp->C<IR::IROp_Adc>();
|
||||
|
||||
uint64_t Constant1{};
|
||||
if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1)) {
|
||||
if (Constant1 == 0) {
|
||||
IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[0]));
|
||||
IREmit->ReplaceNodeArgument(CodeNode, 0, CreateInlineConstant(IREmit, 0));
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case OP_CONDADDNZCV:
|
||||
{
|
||||
auto Op = IROp->C<IR::IROp_CondAddNZCV>();
|
||||
|
@ -142,6 +142,14 @@ DeadFlagCalculationEliminination::Classify(IROp_Header *IROp)
|
||||
.Replacement = OP_SUB,
|
||||
};
|
||||
|
||||
case OP_ADCWITHFLAGS:
|
||||
return {
|
||||
.Read = FLAG_C,
|
||||
.Write = FLAG_NZCV,
|
||||
.CanReplace = true,
|
||||
.Replacement = OP_ADC,
|
||||
};
|
||||
|
||||
case OP_ADDNZCV:
|
||||
case OP_SUBNZCV:
|
||||
case OP_TESTNZ:
|
||||
@ -170,6 +178,9 @@ DeadFlagCalculationEliminination::Classify(IROp_Header *IROp)
|
||||
case OP_LOADNZCV:
|
||||
return {.Read = FLAG_NZCV};
|
||||
|
||||
case OP_ADC:
|
||||
return {.Read = FLAG_C};
|
||||
|
||||
case OP_ADCNZCV:
|
||||
case OP_SBBNZCV:
|
||||
return {
|
||||
|
Loading…
x
Reference in New Issue
Block a user