IR: add ShiftFlags op

Generates flags for a variable shift as a dedicated IR op. This lets us optimize
around it (without generating control flow, relying on deferred flag infra,
etc). And it neatly solves our RA problem for shifts.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
Alyssa Rosenzweig 2024-04-02 11:21:21 -04:00
parent a99c48b7a3
commit 982391ba9d
3 changed files with 95 additions and 0 deletions

View File

@ -863,6 +863,84 @@ DEF_OP(Ashr) {
}
}
DEF_OP(ShiftFlags) {
auto Op = IROp->C<IR::IROp_ShiftFlags>();
const uint8_t OpSize = Op->Size;
const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
const auto PFOutput = GetReg(Node);
const auto PFInput = GetReg(Op->PFInput.ID());
const auto Dst = GetReg(Op->Result.ID());
const auto Src1 = GetReg(Op->Src1.ID());
const auto Src2 = GetReg(Op->Src2.ID());
bool PFBlocked = (PFOutput == Dst) || (PFOutput == Src1) || (PFOutput == Src2);
const auto PFTemp = PFBlocked ? TMP4 : PFOutput;
// Set the output outside the branch to avoid needing an extra leg of the
// branch. We specifically do not hardcode the PF register anywhere (relying
// on a tied SRA register instead) to avoid fighting with RA/RCLSE.
if (PFTemp != PFInput)
mov(ARMEmitter::Size::i64Bit, PFTemp, PFInput);
ARMEmitter::SingleUseForwardLabel Done;
cbz(EmitSize, Src2, &Done);
{
// PF/SF/ZF/OF
if (OpSize >= 4) {
ands(EmitSize, PFOutput, Dst, Dst);
} else {
unsigned Shift = 32 - (OpSize * 8);
cmn(EmitSize, ARMEmitter::Reg::zr, Dst, ARMEmitter::ShiftType::LSL, Shift);
mov(ARMEmitter::Size::i64Bit, PFOutput, Dst);
}
// Extract the last bit shifted in to CF
if (Op->Shift == IR::ShiftType::LSL) {
if (OpSize >= 4) {
neg(EmitSize, TMP1, Src2);
} else {
mov(EmitSize, TMP1, OpSize * 8);
sub(EmitSize, TMP1, TMP1, Src2);
}
} else {
sub(ARMEmitter::Size::i64Bit, TMP1, Src2, 1);
}
lsrv(EmitSize, TMP1, Src1, TMP1);
bool SetOF = Op->Shift != IR::ShiftType::ASR;
if (SetOF) {
// Only defined when Shift is 1 else undefined
// OF flag is set if a sign change occurred
eor(EmitSize, TMP3, Src1, Dst);
}
if (CTX->HostFeatures.SupportsFlagM) {
rmif(TMP1, 63, (1 << 1) /* C */);
if (SetOF)
rmif(TMP3, OpSize * 8 - 1, (1 << 0) /* V */);
} else {
mrs(TMP2, ARMEmitter::SystemRegister::NZCV);
bfi(ARMEmitter::Size::i32Bit, TMP2, TMP1, 29 /* C */, 1);
if (SetOF) {
lsr(EmitSize, TMP3, TMP3, OpSize * 8 - 1);
bfi(ARMEmitter::Size::i32Bit, TMP2, TMP3, 28 /* V */, 1);
}
msr(ARMEmitter::SystemRegister::NZCV, TMP2);
}
}
// TODO: Make RA less dumb so this can't happen (e.g. with late-kill).
if (PFBlocked)
mov(ARMEmitter::Size::i64Bit, PFOutput, PFTemp);
Bind(&Done);
}
DEF_OP(Ror) {
auto Op = IROp->C<IR::IROp_Ror>();
const uint8_t OpSize = IROp->Size;

View File

@ -1218,6 +1218,12 @@
"Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
]
},
"GPR = ShiftFlags OpSize:$Size, GPR:$Result, GPR:$Src1, ShiftType:$Shift, GPR:$Src2, GPR:$PFInput": {
"Desc": ["Set NZCV flags for specified variable integer shift with given result.",
"Returns updated raw PF."],
"HasSideEffects": true,
"DestSize": "8"
},
"GPR = Ror OpSize:#Size, GPR:$Src1, GPR:$Src2": {
"Desc": ["Integer rotate right"
],

View File

@ -157,6 +157,17 @@ DeadFlagCalculationEliminination::Classify(IROp_Header *IROp)
.Replacement = OP_SBB,
};
case OP_SHIFTFLAGS:
// _ShiftFlags conditionally sets NZCV+PF, which we model here as a
// read-modify-write. Logically, it also conditionally makes AF undefined,
// which we model by omitting AF from both Read and Write sets (since
// "cond ? AF : undef" may be optimized to "AF").
return {
.Read = FLAG_NZCV | FLAG_P,
.Write = FLAG_NZCV | FLAG_P,
.CanEliminate = true,
};
case OP_ADDNZCV:
case OP_SUBNZCV:
case OP_TESTNZ: