Arm64: Implement support for NT Loads with ASIMD fallback

This commit is contained in:
Ryan Houdek 2024-07-10 23:06:46 -07:00
parent c9efb75714
commit 4c21aa2604
No known key found for this signature in database
2 changed files with 38 additions and 0 deletions

View File

@ -2320,5 +2320,31 @@ DEF_OP(VStoreNonTemporalPair) {
stnp(ValueLow.Q(), ValueHigh.Q(), MemReg, Offset);
}
DEF_OP(VLoadNonTemporal) {
const auto Op = IROp->C<IR::IROp_VLoadNonTemporal>();
const auto OpSize = IROp->Size;
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
const auto Is128Bit = OpSize == Core::CPUState::XMM_SSE_REG_SIZE;
const auto Dst = GetVReg(Node);
const auto MemReg = GetReg(Op->Addr.ID());
const auto Offset = Op->Offset;
if (Is256Bit) {
LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use VStoreNonTemporal with 256-bit operation");
const auto GoverningPredicate = PRED_TMP_32B.Zeroing();
const auto OffsetScaled = Offset / 32;
ldnt1b(Dst.Z(), GoverningPredicate, MemReg, OffsetScaled);
} else if (Is128Bit && HostSupportsSVE128) {
const auto GoverningPredicate = PRED_TMP_16B.Zeroing();
const auto OffsetScaled = Offset / 16;
ldnt1b(Dst.Z(), GoverningPredicate, MemReg, OffsetScaled);
} else {
// Treat the non-temporal store as a regular vector store in this case for compatibility
ldr(Dst.Q(), MemReg, Offset);
}
}
#undef DEF_OP
} // namespace FEXCore::CPU

View File

@ -694,6 +694,18 @@
"_Offset % RegisterSize == 0",
"RegisterSize == FEXCore::IR::OpSize::i128Bit"
]
},
"FPR = VLoadNonTemporal u8:#RegisterSize, GPR:$Addr, i8:$Offset": {
"Desc": ["Does a non-temporal memory load of a vector.",
"Matches arm64 SVE ldnt1b semantics.",
"Specifically weak-memory model ordered to match x86 non-temporal stores."
],
"HasSideEffects": true,
"DestSize": "RegisterSize",
"EmitValidation": [
"_Offset % RegisterSize == 0",
"RegisterSize == FEXCore::IR::OpSize::i128Bit || RegisterSize == FEXCore::IR::OpSize::i256Bit"
]
}
},
"Atomic": {