diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp index 37152c9b9..cb73a18ce 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp @@ -143,6 +143,26 @@ DEF_OP(AdcNZCV) { adcs(EmitSize, ARMEmitter::Reg::zr, GetReg(Op->Src1.ID()), GetReg(Op->Src2.ID())); } +DEF_OP(AdcWithFlags) { + auto Op = IROp->C<IR::IROp_AdcWithFlags>(); + const auto OpSize = IROp->Size; + + LOGMAN_THROW_AA_FMT(OpSize == IR::i32Bit || OpSize == IR::i64Bit, "Unsupported {} size: {}", __func__, OpSize); + const auto EmitSize = OpSize == IR::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; + + adcs(EmitSize, GetReg(Node), GetZeroableReg(Op->Src1), GetReg(Op->Src2.ID())); +} + +DEF_OP(Adc) { + auto Op = IROp->C<IR::IROp_Adc>(); + const auto OpSize = IROp->Size; + + LOGMAN_THROW_AA_FMT(OpSize == IR::i32Bit || OpSize == IR::i64Bit, "Unsupported {} size: {}", __func__, OpSize); + const auto EmitSize = OpSize == IR::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; + + adc(EmitSize, GetReg(Node), GetZeroableReg(Op->Src1), GetReg(Op->Src2.ID())); +} + DEF_OP(SbbNZCV) { auto Op = IROp->C<IR::IROp_SbbNZCV>(); const auto OpSize = IROp->Size; diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 59d9e0a85..61d214a62 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -391,27 +391,22 @@ void OpDispatchBuilder::ADCOp(OpcodeArgs) { uint8_t Size = GetDstSize(Op); const auto OpSize = IR::SizeToOpSize(std::max<uint8_t>(4u, Size)); - auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC); - auto ALUOp = _Add(OpSize, Src, CF); - - OrderedNode *Result{}; OrderedNode *Before{}; if (DestIsLockedMem(Op)) { + auto ALUOp = _Adc(OpSize, _Constant(0), Src); + HandledLock = true; OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); DestMem = AppendSegmentOffset(DestMem, Op->Flags); Before = _AtomicFetchAdd(IR::SizeToOpSize(Size), ALUOp, DestMem); - Result = _Add(OpSize, Before, ALUOp); } else { Before = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); - Result = _Add(OpSize, Before, ALUOp); - StoreResult(GPRClass, Op, Result, -1); } - if (Size < 4) - Result = _Bfe(IR::SizeToOpSize(std::max<uint8_t>(4u, Size)), Size * 8, 0, Result); - GenerateFlags_ADC(Op, Result, Before, Src, CF); + OrderedNode *Result = CalculateFlags_ADC(Size, Before, Src); + if (!DestIsLockedMem(Op)) + StoreResult(GPRClass, Op, Result, -1); } template<uint32_t SrcIndex, bool SetFlags> diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 357585af0..f721926a7 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -78,7 +78,6 @@ friend class FEXCore::IR::PassManager; public: enum class FlagsGenerationType : uint8_t { TYPE_NONE, - TYPE_ADC, TYPE_SBB, TYPE_SUB, TYPE_MUL, @@ -1646,7 +1645,7 @@ private: OrderedNode *Src2; } TwoSource; - // ADC, SBB + // SBB struct { OrderedNode *Src1; OrderedNode *Src2; @@ -1746,7 +1745,7 @@ private: void CalculateAF(OrderedNode *Src1, OrderedNode *Src2); void CalculateOF(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, bool Sub); - void CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF); + OrderedNode *CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Src1, OrderedNode *Src2); void CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF); OrderedNode *CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF = true); OrderedNode *CalculateFlags_ADD(uint8_t SrcSize, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF = true); @@ -1780,21 +1779,6 @@ private: * * Depending on the operation it may force a RFLAGs calculation before storing the new deferred state. * @{ */ - void GenerateFlags_ADC(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) { - CurrentDeferredFlags = DeferredFlagData { - .Type = FlagsGenerationType::TYPE_ADC, - .SrcSize = GetSrcSize(Op), - .Res = Res, - .Sources = { - .ThreeSource = { - .Src1 = Src1, - .Src2 = Src2, - .Src3 = CF, - }, - }, - }; - } - void GenerateFlags_SBB(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) { CurrentDeferredFlags = DeferredFlagData { .Type = FlagsGenerationType::TYPE_SBB, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp index 7a5286d6c..2c81086aa 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp @@ -310,14 +310,6 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) { } switch (CurrentDeferredFlags.Type) { - case FlagsGenerationType::TYPE_ADC: - CalculateFlags_ADC( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.ThreeSource.Src1, - CurrentDeferredFlags.Sources.ThreeSource.Src2, - CurrentDeferredFlags.Sources.ThreeSource.Src3); - break; case FlagsGenerationType::TYPE_SBB: CalculateFlags_SBB( CurrentDeferredFlags.SrcSize, @@ -477,18 +469,22 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) { NZCVDirty = false; } -void OpDispatchBuilder::CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) { +OrderedNode *OpDispatchBuilder::CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Src1, OrderedNode *Src2) { auto Zero = _Constant(0); auto One = _Constant(1); auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; + OrderedNode *Res; CalculateAF(Src1, Src2); - CalculatePF(Res); if (SrcSize >= 4) { HandleNZCV_RMW(); - _AdcNZCV(OpSize, Src1, Src2); + Res = _AdcWithFlags(OpSize, Src1, Src2); } else { + auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC); + Res = _Add(OpSize, _Add(OpSize, Src1, Src2), CF); + Res = _Bfe(OpSize, SrcSize * 8, 0, Res); + // SF/ZF SetNZ_ZeroCV(SrcSize, Res); @@ -504,6 +500,9 @@ void OpDispatchBuilder::CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Res, Or // Signed CalculateOF(SrcSize, Res, Src1, Src2, false); } + + CalculatePF(Res); + return Res; } void OpDispatchBuilder::CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) { diff --git a/FEXCore/Source/Interface/IR/IR.json b/FEXCore/Source/Interface/IR/IR.json index b3a6fdac6..39d870038 100644 --- a/FEXCore/Source/Interface/IR/IR.json +++ b/FEXCore/Source/Interface/IR/IR.json @@ -951,6 +951,15 @@ "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit" ] }, + "GPR = Adc OpSize:#Size, GPR:$Src1, GPR:$Src2": { + "Desc": [ "Integer Add with carry", + "Will truncate to 64 or 32bits" + ], + "DestSize": "Size", + "EmitValidation": [ + "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit" + ] + }, "GPR = AddShift OpSize:#Size, GPR:$Src1, GPR:$Src2, ShiftType:$Shift{ShiftType::LSL}, u8:$ShiftAmount{0}": { "Desc": [ "Integer Add with shifted register", "Will truncate to 64 or 32bits" @@ -994,6 +1003,14 @@ "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit" ] }, + "GPR = AdcWithFlags OpSize:#Size, GPR:$Src1, GPR:$Src2": { + "Desc": ["Adds and set NZCV for the sum of two GPRs and carry-in given as NZCV"], + "HasSideEffects": true, + "DestSize": "Size", + "EmitValidation": [ + "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit" + ] + }, "AdcNZCV OpSize:#Size, GPR:$Src1, GPR:$Src2": { "Desc": ["Set NZCV for the sum of two GPRs and carry-in given as NZCV"], "HasSideEffects": true, diff --git a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp index b59fc7da9..4ad84056c 100644 --- a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp +++ b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp @@ -995,6 +995,22 @@ bool ConstProp::ConstantInlining(IREmitter *IREmit, const IRListView& CurrentIR) break; } + case OP_ADC: + case OP_ADCWITHFLAGS: + { + auto Op = IROp->C<IR::IROp_Adc>(); + + uint64_t Constant1{}; + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1)) { + if (Constant1 == 0) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[0])); + IREmit->ReplaceNodeArgument(CodeNode, 0, CreateInlineConstant(IREmit, 0)); + Changed = true; + } + } + + break; + } case OP_CONDADDNZCV: { auto Op = IROp->C<IR::IROp_CondAddNZCV>(); diff --git a/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp b/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp index 3c2fcdff2..b0cd435d0 100644 --- a/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp +++ b/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp @@ -142,6 +142,14 @@ DeadFlagCalculationEliminination::Classify(IROp_Header *IROp) .Replacement = OP_SUB, }; + case OP_ADCWITHFLAGS: + return { + .Read = FLAG_C, + .Write = FLAG_NZCV, + .CanReplace = true, + .Replacement = OP_ADC, + }; + case OP_ADDNZCV: case OP_SUBNZCV: case OP_TESTNZ: @@ -170,6 +178,9 @@ DeadFlagCalculationEliminination::Classify(IROp_Header *IROp) case OP_LOADNZCV: return {.Read = FLAG_NZCV}; + case OP_ADC: + return {.Read = FLAG_C}; + case OP_ADCNZCV: case OP_SBBNZCV: return {