OpcodeDispatcher: optimize vcvtps2ph

We can avoid a LOT of pointless work with some dedicated IR ops for specifically
overriding the round mode.

Small behaviour change here: we no longer reset FTZ. I think this is a bug fix?
But if it's not it's not hard to fix.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
Alyssa Rosenzweig 2024-06-26 16:30:35 -04:00
parent d1d41f5645
commit d2324f4a93
3 changed files with 46 additions and 5 deletions

View File

@ -120,6 +120,39 @@ DEF_OP(SetRoundingMode) {
msr(ARMEmitter::SystemRegister::FPCR, TMP1);
}
DEF_OP(PushRoundingMode) {
auto Op = IROp->C<IR::IROp_PushRoundingMode>();
auto Dest = GetReg(Node);
// Save the old rounding mode
mrs(Dest, ARMEmitter::SystemRegister::FPCR);
// vixl simulator doesn't support anything beyond ties-to-even rounding
if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
return;
}
// Insert the rounding flags, reversing the mode bits as above
if (Op->RoundMode == 3) {
orr(ARMEmitter::Size::i64Bit, TMP1, Dest, 3 << 22);
} else if (Op->RoundMode == 0) {
and_(ARMEmitter::Size::i64Bit, TMP1, Dest, ~(3 << 22));
} else {
LOGMAN_THROW_AA_FMT(Op->RoundMode == 1 || Op->RoundMode == 2, "expect a valid round mode");
and_(ARMEmitter::Size::i64Bit, TMP1, Dest, ~(Op->RoundMode << 22));
orr(ARMEmitter::Size::i64Bit, TMP1, TMP1, (Op->RoundMode == 2 ? 1 : 2) << 22);
}
// Now save the new FPCR
msr(ARMEmitter::SystemRegister::FPCR, TMP1);
}
DEF_OP(PopRoundingMode) {
auto Op = IROp->C<IR::IROp_PopRoundingMode>();
msr(ARMEmitter::SystemRegister::FPCR, GetReg(Op->FPCR.ID()));
}
DEF_OP(Print) {
auto Op = IROp->C<IR::IROp_Print>();

View File

@ -4493,13 +4493,10 @@ void OpDispatchBuilder::VCVTPS2PHOp(OpcodeArgs) {
// the RM field in the FPCR. And so! We have to do some ugly
// rounding mode shuffling.
const auto NewRMode = Imm8 & 0b11;
Ref OldRMode = _GetRoundingMode();
_SetRoundingMode(_Constant(NewRMode));
Ref SavedFPCR = _PushRoundingMode(NewRMode);
Result = _Vector_FToF(SrcSize, 2, Src, 4);
_SetRoundingMode(OldRMode);
_PopRoundingMode(SavedFPCR);
}
// We need to eliminate upper junk if we're storing into a register with

View File

@ -231,6 +231,17 @@
],
"HasSideEffects": true
},
"GPR = PushRoundingMode u8:$RoundMode": {
"Desc": ["Override the current rounding mode options for the thread, returning old FPCR"
],
"DestSize": "8",
"HasSideEffects": true
},
"PopRoundingMode GPR:$FPCR": {
"Desc": ["Resets rounding mode after PushRoundingMode operation"
],
"HasSideEffects": true
},
"Print SSA:$Value": {
"HasSideEffects": true,
"Desc": ["Debug operation that prints an SSA value to the console",