mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-01-20 21:37:20 +00:00
IR: add ShiftFlags op
Generates flags for a variable shift as a dedicated IR op. This lets us optimize around it (without generating control flow, relying on deferred flag infra, etc). And it neatly solves our RA problem for shifts. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
parent
a99c48b7a3
commit
982391ba9d
@ -863,6 +863,84 @@ DEF_OP(Ashr) {
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(ShiftFlags) {
|
||||
auto Op = IROp->C<IR::IROp_ShiftFlags>();
|
||||
const uint8_t OpSize = Op->Size;
|
||||
const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
|
||||
|
||||
const auto PFOutput = GetReg(Node);
|
||||
const auto PFInput = GetReg(Op->PFInput.ID());
|
||||
const auto Dst = GetReg(Op->Result.ID());
|
||||
const auto Src1 = GetReg(Op->Src1.ID());
|
||||
const auto Src2 = GetReg(Op->Src2.ID());
|
||||
|
||||
bool PFBlocked = (PFOutput == Dst) || (PFOutput == Src1) || (PFOutput == Src2);
|
||||
const auto PFTemp = PFBlocked ? TMP4 : PFOutput;
|
||||
|
||||
// Set the output outside the branch to avoid needing an extra leg of the
|
||||
// branch. We specifically do not hardcode the PF register anywhere (relying
|
||||
// on a tied SRA register instead) to avoid fighting with RA/RCLSE.
|
||||
if (PFTemp != PFInput)
|
||||
mov(ARMEmitter::Size::i64Bit, PFTemp, PFInput);
|
||||
|
||||
ARMEmitter::SingleUseForwardLabel Done;
|
||||
cbz(EmitSize, Src2, &Done);
|
||||
{
|
||||
// PF/SF/ZF/OF
|
||||
if (OpSize >= 4) {
|
||||
ands(EmitSize, PFOutput, Dst, Dst);
|
||||
} else {
|
||||
unsigned Shift = 32 - (OpSize * 8);
|
||||
cmn(EmitSize, ARMEmitter::Reg::zr, Dst, ARMEmitter::ShiftType::LSL, Shift);
|
||||
mov(ARMEmitter::Size::i64Bit, PFOutput, Dst);
|
||||
}
|
||||
|
||||
// Extract the last bit shifted in to CF
|
||||
if (Op->Shift == IR::ShiftType::LSL) {
|
||||
if (OpSize >= 4) {
|
||||
neg(EmitSize, TMP1, Src2);
|
||||
} else {
|
||||
mov(EmitSize, TMP1, OpSize * 8);
|
||||
sub(EmitSize, TMP1, TMP1, Src2);
|
||||
}
|
||||
} else {
|
||||
sub(ARMEmitter::Size::i64Bit, TMP1, Src2, 1);
|
||||
}
|
||||
|
||||
lsrv(EmitSize, TMP1, Src1, TMP1);
|
||||
|
||||
bool SetOF = Op->Shift != IR::ShiftType::ASR;
|
||||
if (SetOF) {
|
||||
// Only defined when Shift is 1 else undefined
|
||||
// OF flag is set if a sign change occurred
|
||||
eor(EmitSize, TMP3, Src1, Dst);
|
||||
}
|
||||
|
||||
if (CTX->HostFeatures.SupportsFlagM) {
|
||||
rmif(TMP1, 63, (1 << 1) /* C */);
|
||||
|
||||
if (SetOF)
|
||||
rmif(TMP3, OpSize * 8 - 1, (1 << 0) /* V */);
|
||||
} else {
|
||||
mrs(TMP2, ARMEmitter::SystemRegister::NZCV);
|
||||
bfi(ARMEmitter::Size::i32Bit, TMP2, TMP1, 29 /* C */, 1);
|
||||
|
||||
if (SetOF) {
|
||||
lsr(EmitSize, TMP3, TMP3, OpSize * 8 - 1);
|
||||
bfi(ARMEmitter::Size::i32Bit, TMP2, TMP3, 28 /* V */, 1);
|
||||
}
|
||||
|
||||
msr(ARMEmitter::SystemRegister::NZCV, TMP2);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Make RA less dumb so this can't happen (e.g. with late-kill).
|
||||
if (PFBlocked)
|
||||
mov(ARMEmitter::Size::i64Bit, PFOutput, PFTemp);
|
||||
|
||||
Bind(&Done);
|
||||
}
|
||||
|
||||
DEF_OP(Ror) {
|
||||
auto Op = IROp->C<IR::IROp_Ror>();
|
||||
const uint8_t OpSize = IROp->Size;
|
||||
|
@ -1218,6 +1218,12 @@
|
||||
"Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
|
||||
]
|
||||
},
|
||||
"GPR = ShiftFlags OpSize:$Size, GPR:$Result, GPR:$Src1, ShiftType:$Shift, GPR:$Src2, GPR:$PFInput": {
|
||||
"Desc": ["Set NZCV flags for specified variable integer shift with given result.",
|
||||
"Returns updated raw PF."],
|
||||
"HasSideEffects": true,
|
||||
"DestSize": "8"
|
||||
},
|
||||
"GPR = Ror OpSize:#Size, GPR:$Src1, GPR:$Src2": {
|
||||
"Desc": ["Integer rotate right"
|
||||
],
|
||||
|
@ -157,6 +157,17 @@ DeadFlagCalculationEliminination::Classify(IROp_Header *IROp)
|
||||
.Replacement = OP_SBB,
|
||||
};
|
||||
|
||||
case OP_SHIFTFLAGS:
|
||||
// _ShiftFlags conditionally sets NZCV+PF, which we model here as a
|
||||
// read-modify-write. Logically, it also conditionally makes AF undefined,
|
||||
// which we model by omitting AF from both Read and Write sets (since
|
||||
// "cond ? AF : undef" may be optimized to "AF").
|
||||
return {
|
||||
.Read = FLAG_NZCV | FLAG_P,
|
||||
.Write = FLAG_NZCV | FLAG_P,
|
||||
.CanEliminate = true,
|
||||
};
|
||||
|
||||
case OP_ADDNZCV:
|
||||
case OP_SUBNZCV:
|
||||
case OP_TESTNZ:
|
||||
|
Loading…
x
Reference in New Issue
Block a user