mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-02-14 19:47:59 +00:00
IR: Adds support for ARM's FCMA FCADD instruction
This commit is contained in:
parent
c5d147322f
commit
ba01eac467
@ -65,6 +65,17 @@
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
#define DO_VECTOR_FCADD_PAIR_OP(size, type, func) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(std::data(Tmp)); \
|
||||
auto *Src1_d = reinterpret_cast<const type*>(Src1); \
|
||||
auto *Src2_d = reinterpret_cast<const type*>(Src2); \
|
||||
for (uint8_t i = 0; i < Elements; i += 2) { \
|
||||
func(&Dst_d[i], &Src1_d[i], &Src2_d[i]); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
|
||||
#define DO_VECTOR_SCALAR_OP(size, type, func) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(std::data(Tmp)); \
|
||||
|
@ -286,6 +286,7 @@ constexpr OpHandlerArray InterpreterOpHandlers = [] {
|
||||
REGISTER_OP(VREV64, VRev64);
|
||||
REGISTER_OP(VPCMPESTRX, VPCMPESTRX);
|
||||
REGISTER_OP(VPCMPISTRX, VPCMPISTRX);
|
||||
REGISTER_OP(VFCADD, VFCADD);
|
||||
|
||||
// Encryption ops
|
||||
REGISTER_OP(VAESIMC, AESImc);
|
||||
|
@ -314,6 +314,7 @@ namespace FEXCore::CPU {
|
||||
DEF_OP(VRev64);
|
||||
DEF_OP(VPCMPESTRX);
|
||||
DEF_OP(VPCMPISTRX);
|
||||
DEF_OP(VFCADD);
|
||||
|
||||
///< Encryption ops
|
||||
DEF_OP(AESImc);
|
||||
|
@ -2561,6 +2561,44 @@ DEF_OP(VPCMPISTRX) {
|
||||
memcpy(GDP, &Result, sizeof(Result));
|
||||
}
|
||||
|
||||
DEF_OP(VFCADD) {
|
||||
const auto Op = IROp->C<IR::IROp_VFCADD>();
|
||||
const uint8_t OpSize = IROp->Size;
|
||||
|
||||
const auto *Src1 = GetSrc<uint8_t*>(Data->SSAData, Op->Vector1);
|
||||
const auto *Src2 = GetSrc<uint8_t*>(Data->SSAData, Op->Vector2);
|
||||
const auto Rotate = Op->Rotate;
|
||||
LOGMAN_THROW_A_FMT(Rotate == 90 || Rotate == 270, "Invalid rotate!");
|
||||
|
||||
TempVectorDataArray Tmp;
|
||||
|
||||
const uint8_t ElementSize = Op->Header.ElementSize;
|
||||
const uint8_t Elements = OpSize / ElementSize;
|
||||
|
||||
const auto Func = [Rotate](auto dst, auto src1, auto src2) {
|
||||
auto Element1 = src2[1];
|
||||
auto Element3 = src2[0];
|
||||
if (Rotate == 90) {
|
||||
Element1 = -Element1;
|
||||
}
|
||||
else {
|
||||
Element3 = -Element3;
|
||||
}
|
||||
dst[0] = src1[0] + Element1;
|
||||
dst[1] = src1[1] + Element3;
|
||||
};
|
||||
|
||||
switch (ElementSize) {
|
||||
//DO_VECTOR_FCADD_PAIR_OP(2, float16_t, Func)
|
||||
DO_VECTOR_FCADD_PAIR_OP(4, float, Func)
|
||||
DO_VECTOR_FCADD_PAIR_OP(8, double, Func)
|
||||
default:
|
||||
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
|
||||
break;
|
||||
}
|
||||
memcpy(GDP, Tmp.data(), OpSize);
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
|
||||
} // namespace FEXCore::CPU
|
||||
|
@ -1104,6 +1104,7 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry,
|
||||
REGISTER_OP(VTBL1, VTBL1);
|
||||
REGISTER_OP(VREV32, VRev32);
|
||||
REGISTER_OP(VREV64, VRev64);
|
||||
REGISTER_OP(VFCADD, VFCADD);
|
||||
#undef REGISTER_OP
|
||||
|
||||
default:
|
||||
|
@ -470,6 +470,7 @@ private:
|
||||
DEF_OP(VTBL1);
|
||||
DEF_OP(VRev32);
|
||||
DEF_OP(VRev64);
|
||||
DEF_OP(VFCADD);
|
||||
|
||||
///< Encryption ops
|
||||
DEF_OP(AESImc);
|
||||
|
@ -3825,6 +3825,50 @@ DEF_OP(VRev64) {
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(VFCADD) {
|
||||
const auto Op = IROp->C<IR::IROp_VFCADD>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
const auto ElementSize = Op->Header.ElementSize;
|
||||
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
|
||||
|
||||
const auto Dst = GetVReg(Node);
|
||||
const auto Vector1 = GetVReg(Op->Vector1.ID());
|
||||
const auto Vector2 = GetVReg(Op->Vector2.ID());
|
||||
|
||||
LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size");
|
||||
LOGMAN_THROW_A_FMT(Op->Rotate == 90 || Op->Rotate == 270, "Invalidate Rotate");
|
||||
const auto SubRegSize =
|
||||
ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit :
|
||||
ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i64Bit;
|
||||
const auto Rotate =
|
||||
Op->Rotate == 90 ? ARMEmitter::Rotation::ROTATE_90 : ARMEmitter::Rotation::ROTATE_270;
|
||||
|
||||
if (HostSupportsSVE256 && Is256Bit) {
|
||||
const auto Mask = PRED_TMP_32B.Merging();
|
||||
|
||||
if (Dst == Vector1) {
|
||||
// Trivial case where we already have first vector in the destination
|
||||
// register. We can just do the operation in place.
|
||||
fcadd(SubRegSize, Dst.Z(), Mask, Vector1.Z(), Vector2.Z(), Rotate);
|
||||
}
|
||||
else {
|
||||
// SVE FCADD is a destructive operation, so we need
|
||||
// a temporary for performing operations.
|
||||
movprfx(VTMP1.Z(), Vector1.Z());
|
||||
fcadd(SubRegSize, VTMP1.Z(), Mask, VTMP1.Z(), Vector2.Z(), Rotate);
|
||||
mov(Dst.Z(), VTMP1.Z());
|
||||
}
|
||||
} else {
|
||||
if (OpSize == 8) {
|
||||
fcadd(SubRegSize, Dst.D(), Vector1.D(), Vector2.D(), Rotate);
|
||||
}
|
||||
else {
|
||||
fcadd(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q(), Rotate);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef DEF_OP
|
||||
}
|
||||
|
||||
|
@ -1558,6 +1558,10 @@
|
||||
"course of creating the intermediate result"
|
||||
],
|
||||
"DestSize": "4"
|
||||
},
|
||||
"FPR = VFCADD u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, u16:$Rotate": {
|
||||
"DestSize": "RegisterSize",
|
||||
"NumElements": "RegisterSize / ElementSize"
|
||||
}
|
||||
},
|
||||
"Conv": {
|
||||
|
Loading…
x
Reference in New Issue
Block a user