diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp index 84f00b923..f10ba4cbe 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp @@ -863,6 +863,84 @@ DEF_OP(Ashr) { } } +DEF_OP(ShiftFlags) { + auto Op = IROp->C(); + const uint8_t OpSize = Op->Size; + const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; + + const auto PFOutput = GetReg(Node); + const auto PFInput = GetReg(Op->PFInput.ID()); + const auto Dst = GetReg(Op->Result.ID()); + const auto Src1 = GetReg(Op->Src1.ID()); + const auto Src2 = GetReg(Op->Src2.ID()); + + bool PFBlocked = (PFOutput == Dst) || (PFOutput == Src1) || (PFOutput == Src2); + const auto PFTemp = PFBlocked ? TMP4 : PFOutput; + + // Set the output outside the branch to avoid needing an extra leg of the + // branch. We specifically do not hardcode the PF register anywhere (relying + // on a tied SRA register instead) to avoid fighting with RA/RCLSE. + if (PFTemp != PFInput) + mov(ARMEmitter::Size::i64Bit, PFTemp, PFInput); + + ARMEmitter::SingleUseForwardLabel Done; + cbz(EmitSize, Src2, &Done); + { + // PF/SF/ZF/OF + if (OpSize >= 4) { + ands(EmitSize, PFOutput, Dst, Dst); + } else { + unsigned Shift = 32 - (OpSize * 8); + cmn(EmitSize, ARMEmitter::Reg::zr, Dst, ARMEmitter::ShiftType::LSL, Shift); + mov(ARMEmitter::Size::i64Bit, PFOutput, Dst); + } + + // Extract the last bit shifted in to CF + if (Op->Shift == IR::ShiftType::LSL) { + if (OpSize >= 4) { + neg(EmitSize, TMP1, Src2); + } else { + mov(EmitSize, TMP1, OpSize * 8); + sub(EmitSize, TMP1, TMP1, Src2); + } + } else { + sub(ARMEmitter::Size::i64Bit, TMP1, Src2, 1); + } + + lsrv(EmitSize, TMP1, Src1, TMP1); + + bool SetOF = Op->Shift != IR::ShiftType::ASR; + if (SetOF) { + // Only defined when Shift is 1 else undefined + // OF flag is set if a sign change occurred + eor(EmitSize, TMP3, Src1, Dst); + } + + if (CTX->HostFeatures.SupportsFlagM) { + rmif(TMP1, 63, (1 << 1) /* C */); + + if (SetOF) + rmif(TMP3, OpSize * 8 - 1, (1 << 0) /* V */); + } else { + mrs(TMP2, ARMEmitter::SystemRegister::NZCV); + bfi(ARMEmitter::Size::i32Bit, TMP2, TMP1, 29 /* C */, 1); + + if (SetOF) { + lsr(EmitSize, TMP3, TMP3, OpSize * 8 - 1); + bfi(ARMEmitter::Size::i32Bit, TMP2, TMP3, 28 /* V */, 1); + } + + msr(ARMEmitter::SystemRegister::NZCV, TMP2); + } + } + + // TODO: Make RA less dumb so this can't happen (e.g. with late-kill). + if (PFBlocked) + mov(ARMEmitter::Size::i64Bit, PFOutput, PFTemp); + + Bind(&Done); +} + DEF_OP(Ror) { auto Op = IROp->C(); const uint8_t OpSize = IROp->Size; diff --git a/FEXCore/Source/Interface/IR/IR.json b/FEXCore/Source/Interface/IR/IR.json index 0e8cbef9f..e8542f76e 100644 --- a/FEXCore/Source/Interface/IR/IR.json +++ b/FEXCore/Source/Interface/IR/IR.json @@ -1218,6 +1218,12 @@ "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit" ] }, + "GPR = ShiftFlags OpSize:$Size, GPR:$Result, GPR:$Src1, ShiftType:$Shift, GPR:$Src2, GPR:$PFInput": { + "Desc": ["Set NZCV flags for specified variable integer shift with given result.", + "Returns updated raw PF."], + "HasSideEffects": true, + "DestSize": "8" + }, "GPR = Ror OpSize:#Size, GPR:$Src1, GPR:$Src2": { "Desc": ["Integer rotate right" ], diff --git a/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp b/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp index 70071cc37..4248d08ae 100644 --- a/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp +++ b/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp @@ -157,6 +157,17 @@ DeadFlagCalculationEliminination::Classify(IROp_Header *IROp) .Replacement = OP_SBB, }; + case OP_SHIFTFLAGS: + // _ShiftFlags conditionally sets NZCV+PF, which we model here as a + // read-modify-write. Logically, it also conditionally makes AF undefined, + // which we model by omitting AF from both Read and Write sets (since + // "cond ? AF : undef" may be optimized to "AF"). + return { + .Read = FLAG_NZCV | FLAG_P, + .Write = FLAG_NZCV | FLAG_P, + .CanEliminate = true, + }; + case OP_ADDNZCV: case OP_SUBNZCV: case OP_TESTNZ: