mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-01-22 06:20:58 +00:00
Arm64: Implements support for DAZ using AFP.FIZ
When AFP is supported then we can actually support DAZ. This might also fix the audio corruption in Animal Well but I can't test it until Steam is running on Oryon. Requires a bit of plumbing for MXCSR which we were hacking around before but now we actually want to store the value. Fixes #3856
This commit is contained in:
parent
54fc8cb0bd
commit
b78da2e5ad
@ -575,7 +575,7 @@ void Arm64Emitter::SpillStaticRegs(ARMEmitter::Register TmpReg, bool FPRs, uint3
|
||||
if (EmitterCTX->HostFeatures.SupportsAFP) {
|
||||
// Disable AFP features when spilling registers.
|
||||
//
|
||||
// Disable FPCR.NEP and FPCR.AH
|
||||
// Disable FPCR.NEP and FPCR.AH and FPCR.FIZ
|
||||
// NEP(2): Changes ASIMD scalar instructions to insert in to the lower bits of the destination.
|
||||
// AH(1): Changes NaN behaviour in some instructions. Specifically fmin, fmax.
|
||||
// Also interacts with RPRES to change reciprocal/rsqrt precision from 8-bit mantissa to 12-bit.
|
||||
@ -585,7 +585,8 @@ void Arm64Emitter::SpillStaticRegs(ARMEmitter::Register TmpReg, bool FPRs, uint3
|
||||
mrs(TmpReg, ARMEmitter::SystemRegister::FPCR);
|
||||
bic(ARMEmitter::Size::i64Bit, TmpReg, TmpReg,
|
||||
(1U << 2) | // NEP
|
||||
(1U << 1)); // AH
|
||||
(1U << 1) | // AH
|
||||
(1U << 0)); // FIZ
|
||||
msr(ARMEmitter::SystemRegister::FPCR, TmpReg);
|
||||
}
|
||||
#endif
|
||||
@ -664,18 +665,24 @@ void Arm64Emitter::SpillStaticRegs(ARMEmitter::Register TmpReg, bool FPRs, uint3
|
||||
}
|
||||
|
||||
void Arm64Emitter::FillStaticRegs(bool FPRs, uint32_t GPRFillMask, uint32_t FPRFillMask) {
|
||||
ARMEmitter::Register TmpReg = ARMEmitter::Reg::r0;
|
||||
LOGMAN_THROW_A_FMT(GPRFillMask != 0, "Must fill at least 1 GPR for a temp");
|
||||
[[maybe_unused]] bool FoundRegister {};
|
||||
for (auto Reg : StaticRegisters) {
|
||||
if (((1U << Reg.Idx()) & GPRFillMask)) {
|
||||
TmpReg = Reg;
|
||||
FoundRegister = true;
|
||||
break;
|
||||
auto FindTempReg = [this](uint32_t* GPRFillMask) -> std::optional<ARMEmitter::Register> {
|
||||
for (auto Reg : StaticRegisters) {
|
||||
if (((1U << Reg.Idx()) & *GPRFillMask)) {
|
||||
*GPRFillMask &= ~(1U << Reg.Idx());
|
||||
return std::make_optional(Reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
LOGMAN_THROW_A_FMT(FoundRegister, "Didn't have an SRA register to use as a temporary while spilling!");
|
||||
LOGMAN_THROW_A_FMT(GPRFillMask != 0, "Must fill at least 2 GPRs for a temp");
|
||||
uint32_t TempGPRFillMask = GPRFillMask;
|
||||
auto Reg = FindTempReg(&TempGPRFillMask);
|
||||
auto Reg2 = FindTempReg(&TempGPRFillMask);
|
||||
LOGMAN_THROW_A_FMT(Reg.has_value() && Reg2.has_value(), "Didn't have an SRA register to use as a temporary while spilling!");
|
||||
|
||||
auto TmpReg = *Reg;
|
||||
[[maybe_unused]] auto TmpReg2 = *Reg2;
|
||||
|
||||
#ifndef VIXL_SIMULATOR
|
||||
if (EmitterCTX->HostFeatures.SupportsAFP) {
|
||||
@ -692,6 +699,11 @@ void Arm64Emitter::FillStaticRegs(bool FPRs, uint32_t GPRFillMask, uint32_t FPRF
|
||||
orr(ARMEmitter::Size::i64Bit, TmpReg, TmpReg,
|
||||
(1U << 2) | // NEP
|
||||
(1U << 1)); // AH
|
||||
|
||||
// Insert MXCSR.DAZ in to FIZ
|
||||
ldr(TmpReg2.W(), STATE.R(), offsetof(FEXCore::Core::CPUState, mxcsr));
|
||||
bfxil(ARMEmitter::Size::i64Bit, TmpReg, TmpReg2, 6, 1);
|
||||
|
||||
msr(ARMEmitter::SystemRegister::FPCR, TmpReg);
|
||||
}
|
||||
#endif
|
||||
|
@ -98,6 +98,7 @@ DEF_OP(GetRoundingMode) {
|
||||
DEF_OP(SetRoundingMode) {
|
||||
auto Op = IROp->C<IR::IROp_SetRoundingMode>();
|
||||
auto Src = GetReg(Op->RoundMode.ID());
|
||||
auto MXCSR = GetReg(Op->MXCSR.ID());
|
||||
|
||||
// As above, setup the rounding flags in [31:30]
|
||||
rbit(ARMEmitter::Size::i32Bit, TMP2, Src);
|
||||
@ -116,6 +117,11 @@ DEF_OP(SetRoundingMode) {
|
||||
lsr(ARMEmitter::Size::i64Bit, TMP2, Src, 2);
|
||||
bfi(ARMEmitter::Size::i64Bit, TMP1, TMP2, 24, 1);
|
||||
|
||||
if (Op->SetDAZ && HostSupportsAFP) {
|
||||
// Extract DAZ from MXCSR and insert to in FPCR.FIZ
|
||||
bfxil(ARMEmitter::Size::i64Bit, TMP1, MXCSR, 6, 1);
|
||||
}
|
||||
|
||||
// Now save the new FPCR
|
||||
msr(ARMEmitter::SystemRegister::FPCR, TMP1);
|
||||
}
|
||||
|
@ -2599,10 +2599,11 @@ void OpDispatchBuilder::SaveAVXState(Ref MemBase) {
|
||||
}
|
||||
|
||||
Ref OpDispatchBuilder::GetMXCSR() {
|
||||
// Default MXCSR Value
|
||||
Ref MXCSR = _Constant(0x1F80);
|
||||
Ref RoundingMode = _GetRoundingMode();
|
||||
return _Bfi(OpSize::i32Bit, 3, 13, MXCSR, RoundingMode);
|
||||
Ref MXCSR = _LoadContext(OpSize::i32Bit, GPRClass, offsetof(FEXCore::Core::CPUState, mxcsr));
|
||||
// Mask out unsupported bits
|
||||
// Keeps FZ, RC, exception masks, and DAZ
|
||||
MXCSR = _And(OpSize::i32Bit, MXCSR, _Constant(0xFFC0));
|
||||
return MXCSR;
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::FXRStoreOp(OpcodeArgs) {
|
||||
@ -2711,9 +2712,13 @@ void OpDispatchBuilder::RestoreSSEState(Ref MemBase) {
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::RestoreMXCSRState(Ref MXCSR) {
|
||||
// Mask out unsupported bits
|
||||
MXCSR = _And(OpSize::i32Bit, MXCSR, _Constant(0xFFC0));
|
||||
|
||||
_StoreContext(OpSize::i32Bit, GPRClass, MXCSR, offsetof(FEXCore::Core::CPUState, mxcsr));
|
||||
// We only support the rounding mode and FTZ bit being set
|
||||
Ref RoundingMode = _Bfe(OpSize::i32Bit, 3, 13, MXCSR);
|
||||
_SetRoundingMode(RoundingMode);
|
||||
_SetRoundingMode(RoundingMode, true, MXCSR);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::RestoreAVXState(Ref MemBase) {
|
||||
|
@ -48,7 +48,7 @@ void OpDispatchBuilder::FNINITF64(OpcodeArgs) {
|
||||
auto NewFCW = _Constant(16, 0x037F);
|
||||
// Init host rounding mode to zero
|
||||
auto Zero = _Constant(0);
|
||||
_SetRoundingMode(Zero);
|
||||
_SetRoundingMode(Zero, false, Zero);
|
||||
_StoreContext(2, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW));
|
||||
|
||||
// Init FSW to 0
|
||||
@ -71,7 +71,7 @@ void OpDispatchBuilder::X87LDENVF64(OpcodeArgs) {
|
||||
// ignore the rounding precision, we're always 64-bit in F64.
|
||||
// extract rounding mode
|
||||
Ref roundingMode = _Bfe(OpSize::i32Bit, 3, 10, NewFCW);
|
||||
_SetRoundingMode(roundingMode);
|
||||
_SetRoundingMode(roundingMode, false, roundingMode);
|
||||
_StoreContext(2, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW));
|
||||
|
||||
auto NewFSW = _LoadMem(GPRClass, Size, Mem, _Constant(Size * 1), Size, MEM_OFFSET_SXTX, 1);
|
||||
@ -89,7 +89,7 @@ void OpDispatchBuilder::X87FLDCWF64(OpcodeArgs) {
|
||||
// ignore the rounding precision, we're always 64-bit in F64.
|
||||
// extract rounding mode
|
||||
Ref roundingMode = _Bfe(OpSize::i32Bit, 3, 10, NewFCW);
|
||||
_SetRoundingMode(roundingMode);
|
||||
_SetRoundingMode(roundingMode, false, roundingMode);
|
||||
_StoreContext(2, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW));
|
||||
}
|
||||
|
||||
@ -783,7 +783,7 @@ void OpDispatchBuilder::X87FRSTORF64(OpcodeArgs) {
|
||||
auto roundMask = _Constant(3);
|
||||
roundingMode = _Lshr(OpSize::i32Bit, roundingMode, roundShift);
|
||||
roundingMode = _And(OpSize::i32Bit, roundingMode, roundMask);
|
||||
_SetRoundingMode(roundingMode);
|
||||
_SetRoundingMode(roundingMode, false, roundingMode);
|
||||
_StoreContext(2, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW));
|
||||
_StoreContext(2, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW));
|
||||
|
||||
|
@ -226,7 +226,7 @@
|
||||
"DestSize": "4"
|
||||
},
|
||||
|
||||
"SetRoundingMode GPR:$RoundMode": {
|
||||
"SetRoundingMode GPR:$RoundMode, i1:$SetDAZ, GPR:$MXCSR": {
|
||||
"Desc": ["Sets the current rounding mode options for the thread"
|
||||
],
|
||||
"HasSideEffects": true
|
||||
|
@ -104,7 +104,7 @@ struct CPUState {
|
||||
// Raw segment register indexes
|
||||
uint16_t es_idx {}, cs_idx {}, ss_idx {}, ds_idx {};
|
||||
uint16_t gs_idx {}, fs_idx {};
|
||||
uint16_t _pad2[2];
|
||||
uint32_t mxcsr {};
|
||||
|
||||
// Segment registers holding base addresses
|
||||
uint32_t es_cached {}, cs_cached {}, ss_cached {}, ds_cached {};
|
||||
@ -162,6 +162,10 @@ struct CPUState {
|
||||
// we encode DF as 1/-1 within the JIT, so we have to write 0x1 here to
|
||||
// zero DF.
|
||||
flags[X86State::RFLAG_DF_RAW_LOC] = 0x1;
|
||||
|
||||
// Default mxcsr value
|
||||
// All exception masks enabled.
|
||||
mxcsr = 0x1F80;
|
||||
}
|
||||
};
|
||||
static_assert(std::is_trivially_copyable_v<CPUState>, "Needs to be trivial");
|
||||
|
@ -2,7 +2,7 @@
|
||||
{
|
||||
"HostFeatures": ["AVX"],
|
||||
"RegData": {
|
||||
"RAX": "0xFF80"
|
||||
"RAX": "0xFFC0"
|
||||
},
|
||||
"MemoryRegions": {
|
||||
"0x100000000": "4096"
|
||||
|
Loading…
x
Reference in New Issue
Block a user