IR: Adds support for ARM's FCMA FCADD instruction

This commit is contained in:
Ryan Houdek 2023-08-24 14:18:33 -07:00
parent c5d147322f
commit ba01eac467
8 changed files with 101 additions and 0 deletions

View File

@ -65,6 +65,17 @@
} \
break; \
}
#define DO_VECTOR_FCADD_PAIR_OP(size, type, func) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(std::data(Tmp)); \
auto *Src1_d = reinterpret_cast<const type*>(Src1); \
auto *Src2_d = reinterpret_cast<const type*>(Src2); \
for (uint8_t i = 0; i < Elements; i += 2) { \
func(&Dst_d[i], &Src1_d[i], &Src2_d[i]); \
} \
break; \
}
#define DO_VECTOR_SCALAR_OP(size, type, func) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(std::data(Tmp)); \

View File

@ -286,6 +286,7 @@ constexpr OpHandlerArray InterpreterOpHandlers = [] {
REGISTER_OP(VREV64, VRev64);
REGISTER_OP(VPCMPESTRX, VPCMPESTRX);
REGISTER_OP(VPCMPISTRX, VPCMPISTRX);
REGISTER_OP(VFCADD, VFCADD);
// Encryption ops
REGISTER_OP(VAESIMC, AESImc);

View File

@ -314,6 +314,7 @@ namespace FEXCore::CPU {
DEF_OP(VRev64);
DEF_OP(VPCMPESTRX);
DEF_OP(VPCMPISTRX);
DEF_OP(VFCADD);
///< Encryption ops
DEF_OP(AESImc);

View File

@ -2561,6 +2561,44 @@ DEF_OP(VPCMPISTRX) {
memcpy(GDP, &Result, sizeof(Result));
}
DEF_OP(VFCADD) {
const auto Op = IROp->C<IR::IROp_VFCADD>();
const uint8_t OpSize = IROp->Size;
const auto *Src1 = GetSrc<uint8_t*>(Data->SSAData, Op->Vector1);
const auto *Src2 = GetSrc<uint8_t*>(Data->SSAData, Op->Vector2);
const auto Rotate = Op->Rotate;
LOGMAN_THROW_A_FMT(Rotate == 90 || Rotate == 270, "Invalid rotate!");
TempVectorDataArray Tmp;
const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize / ElementSize;
const auto Func = [Rotate](auto dst, auto src1, auto src2) {
auto Element1 = src2[1];
auto Element3 = src2[0];
if (Rotate == 90) {
Element1 = -Element1;
}
else {
Element3 = -Element3;
}
dst[0] = src1[0] + Element1;
dst[1] = src1[1] + Element3;
};
switch (ElementSize) {
//DO_VECTOR_FCADD_PAIR_OP(2, float16_t, Func)
DO_VECTOR_FCADD_PAIR_OP(4, float, Func)
DO_VECTOR_FCADD_PAIR_OP(8, double, Func)
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
memcpy(GDP, Tmp.data(), OpSize);
}
#undef DEF_OP
} // namespace FEXCore::CPU

View File

@ -1104,6 +1104,7 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry,
REGISTER_OP(VTBL1, VTBL1);
REGISTER_OP(VREV32, VRev32);
REGISTER_OP(VREV64, VRev64);
REGISTER_OP(VFCADD, VFCADD);
#undef REGISTER_OP
default:

View File

@ -470,6 +470,7 @@ private:
DEF_OP(VTBL1);
DEF_OP(VRev32);
DEF_OP(VRev64);
DEF_OP(VFCADD);
///< Encryption ops
DEF_OP(AESImc);

View File

@ -3825,6 +3825,50 @@ DEF_OP(VRev64) {
}
}
DEF_OP(VFCADD) {
const auto Op = IROp->C<IR::IROp_VFCADD>();
const auto OpSize = IROp->Size;
const auto ElementSize = Op->Header.ElementSize;
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
const auto Dst = GetVReg(Node);
const auto Vector1 = GetVReg(Op->Vector1.ID());
const auto Vector2 = GetVReg(Op->Vector2.ID());
LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size");
LOGMAN_THROW_A_FMT(Op->Rotate == 90 || Op->Rotate == 270, "Invalidate Rotate");
const auto SubRegSize =
ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit :
ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i64Bit;
const auto Rotate =
Op->Rotate == 90 ? ARMEmitter::Rotation::ROTATE_90 : ARMEmitter::Rotation::ROTATE_270;
if (HostSupportsSVE256 && Is256Bit) {
const auto Mask = PRED_TMP_32B.Merging();
if (Dst == Vector1) {
// Trivial case where we already have first vector in the destination
// register. We can just do the operation in place.
fcadd(SubRegSize, Dst.Z(), Mask, Vector1.Z(), Vector2.Z(), Rotate);
}
else {
// SVE FCADD is a destructive operation, so we need
// a temporary for performing operations.
movprfx(VTMP1.Z(), Vector1.Z());
fcadd(SubRegSize, VTMP1.Z(), Mask, VTMP1.Z(), Vector2.Z(), Rotate);
mov(Dst.Z(), VTMP1.Z());
}
} else {
if (OpSize == 8) {
fcadd(SubRegSize, Dst.D(), Vector1.D(), Vector2.D(), Rotate);
}
else {
fcadd(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q(), Rotate);
}
}
}
#undef DEF_OP
}

View File

@ -1558,6 +1558,10 @@
"course of creating the intermediate result"
],
"DestSize": "4"
},
"FPR = VFCADD u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, u16:$Rotate": {
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
}
},
"Conv": {