mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-12-12 16:46:23 +00:00
Arm64: Implement support for NT Loads with ASIMD fallback
This commit is contained in:
parent
c9efb75714
commit
4c21aa2604
@ -2320,5 +2320,31 @@ DEF_OP(VStoreNonTemporalPair) {
|
||||
stnp(ValueLow.Q(), ValueHigh.Q(), MemReg, Offset);
|
||||
}
|
||||
|
||||
DEF_OP(VLoadNonTemporal) {
|
||||
const auto Op = IROp->C<IR::IROp_VLoadNonTemporal>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
|
||||
const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
const auto Dst = GetVReg(Node);
|
||||
const auto MemReg = GetReg(Op->Addr.ID());
|
||||
const auto Offset = Op->Offset;
|
||||
|
||||
if (Is256Bit) {
|
||||
LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreNonTemporal with 256-bit operation");
|
||||
const auto GoverningPredicate = PRED_TMP_32B.Zeroing();
|
||||
const auto OffsetScaled = Offset / 32;
|
||||
ldnt1b(Dst.Z(), GoverningPredicate, MemReg, OffsetScaled);
|
||||
} else if (Is128Bit && HostSupportsSVE128) {
|
||||
const auto GoverningPredicate = PRED_TMP_16B.Zeroing();
|
||||
const auto OffsetScaled = Offset / 16;
|
||||
ldnt1b(Dst.Z(), GoverningPredicate, MemReg, OffsetScaled);
|
||||
} else {
|
||||
// Treat the non-temporal store as a regular vector store in this case for compatibility
|
||||
ldr(Dst.Q(), MemReg, Offset);
|
||||
}
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
} // namespace FEXCore::CPU
|
||||
|
@ -694,6 +694,18 @@
|
||||
"_Offset % RegisterSize == 0",
|
||||
"RegisterSize == FEXCore::IR::OpSize::i128Bit"
|
||||
]
|
||||
},
|
||||
"FPR = VLoadNonTemporal u8:#RegisterSize, GPR:$Addr, i8:$Offset": {
|
||||
"Desc": ["Does a non-temporal memory load of a vector.",
|
||||
"Matches arm64 SVE ldnt1b semantics.",
|
||||
"Specifically weak-memory model ordered to match x86 non-temporal stores."
|
||||
],
|
||||
"HasSideEffects": true,
|
||||
"DestSize": "RegisterSize",
|
||||
"EmitValidation": [
|
||||
"_Offset % RegisterSize == 0",
|
||||
"RegisterSize == FEXCore::IR::OpSize::i128Bit || RegisterSize == FEXCore::IR::OpSize::i256Bit"
|
||||
]
|
||||
}
|
||||
},
|
||||
"Atomic": {
|
||||
|
Loading…
Reference in New Issue
Block a user