mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-01-22 14:26:51 +00:00
Merge pull request #3882 from Sonicadvance1/scalar_afp_fma
AVX128: Implement support for scalar FMA with AFP
This commit is contained in:
commit
9201ac5a6b
@ -345,6 +345,10 @@ private:
|
||||
uint32_t SpillSlots {};
|
||||
using OpType = void (Arm64JITCore::*)(const IR::IROp_Header* IROp, IR::NodeID Node);
|
||||
|
||||
using ScalarFMAOpCaller =
|
||||
std::function<void(ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2, ARMEmitter::VRegister Src3)>;
|
||||
void VFScalarFMAOperation(uint8_t OpSize, uint8_t ElementSize, ScalarFMAOpCaller ScalarEmit, ARMEmitter::VRegister Dst,
|
||||
ARMEmitter::VRegister Vector1, ARMEmitter::VRegister Vector2, ARMEmitter::VRegister Addend);
|
||||
using ScalarBinaryOpCaller = std::function<void(ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2)>;
|
||||
void VFScalarOperation(uint8_t OpSize, uint8_t ElementSize, bool ZeroUpperBits, ScalarBinaryOpCaller ScalarEmit,
|
||||
ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1, ARMEmitter::VRegister Vector2);
|
||||
|
@ -188,6 +188,30 @@ namespace FEXCore::CPU {
|
||||
VFScalarOperation(IROp->Size, ElementSize, Op->ZeroUpperBits, ScalarEmit, Dst, Vector1, Vector2); \
|
||||
}
|
||||
|
||||
#define DEF_FMAOP_SCALAR_INSERT(FEXOp, ARMOp) \
|
||||
DEF_OP(FEXOp) { \
|
||||
const auto Op = IROp->C<IR::IROp_##FEXOp>(); \
|
||||
const auto ElementSize = Op->Header.ElementSize; \
|
||||
\
|
||||
auto ScalarEmit = \
|
||||
[this, ElementSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2, ARMEmitter::VRegister Src3) { \
|
||||
if (ElementSize == 2) { \
|
||||
ARMOp(Dst.H(), Src1.H(), Src2.H(), Src3.H()); \
|
||||
} else if (ElementSize == 4) { \
|
||||
ARMOp(Dst.S(), Src1.S(), Src2.S(), Src3.S()); \
|
||||
} else if (ElementSize == 8) { \
|
||||
ARMOp(Dst.D(), Src1.D(), Src2.D(), Src3.D()); \
|
||||
} \
|
||||
}; \
|
||||
\
|
||||
const auto Dst = GetVReg(Node); \
|
||||
const auto Vector1 = GetVReg(Op->Vector1.ID()); \
|
||||
const auto Vector2 = GetVReg(Op->Vector2.ID()); \
|
||||
const auto Addend = GetVReg(Op->Addend.ID()); \
|
||||
\
|
||||
VFScalarFMAOperation(IROp->Size, ElementSize, ScalarEmit, Dst, Vector1, Vector2, Addend); \
|
||||
}
|
||||
|
||||
DEF_UNOP(VAbs, abs, true)
|
||||
DEF_UNOP(VPopcount, cnt, true)
|
||||
DEF_UNOP(VNeg, neg, false)
|
||||
@ -224,6 +248,35 @@ DEF_FBINOP_SCALAR_INSERT(VFSubScalarInsert, fsub)
|
||||
DEF_FBINOP_SCALAR_INSERT(VFMulScalarInsert, fmul)
|
||||
DEF_FBINOP_SCALAR_INSERT(VFDivScalarInsert, fdiv)
|
||||
|
||||
DEF_FMAOP_SCALAR_INSERT(VFMLAScalarInsert, fmadd)
|
||||
DEF_FMAOP_SCALAR_INSERT(VFMLSScalarInsert, fnmsub)
|
||||
DEF_FMAOP_SCALAR_INSERT(VFNMLAScalarInsert, fmsub)
|
||||
DEF_FMAOP_SCALAR_INSERT(VFNMLSScalarInsert, fnmadd)
|
||||
|
||||
void Arm64JITCore::VFScalarFMAOperation(uint8_t OpSize, uint8_t ElementSize, ScalarFMAOpCaller ScalarEmit, ARMEmitter::VRegister Dst,
|
||||
ARMEmitter::VRegister Vector1, ARMEmitter::VRegister Vector2, ARMEmitter::VRegister Addend) {
|
||||
LOGMAN_THROW_A_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, "256-bit unsupported", __func__);
|
||||
|
||||
LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size");
|
||||
const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit :
|
||||
ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit :
|
||||
ARMEmitter::SubRegSize::i64Bit);
|
||||
if (Dst != Vector1 && Dst != Vector2 && Dst != Addend && HostSupportsAFP) {
|
||||
// If destination doesnt overlap any incoming register then move the adder to the destination first.
|
||||
mov(Dst.Q(), Addend.Q());
|
||||
Dst = Addend;
|
||||
}
|
||||
|
||||
if (HostSupportsAFP && Dst == Addend) {
|
||||
///< Exactly matches ARM scalar FMA semantics
|
||||
// If the host CPU supports AFP then scalar does an insert without modifying upper bits.
|
||||
ScalarEmit(Dst, Vector1, Vector2, Addend);
|
||||
} else {
|
||||
// No overlap between addr and destination or host doesn't support AFP, need to emit in to a temporary then insert.
|
||||
ScalarEmit(VTMP1, Vector1, Vector2, Addend);
|
||||
ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
// VFScalarOperation performs the operation described through ScalarEmit between Vector1 and Vector2,
|
||||
// storing it into Dst. This is a scalar operation, so the only lowest element of each vector is used for the operation.
|
||||
|
@ -1160,7 +1160,8 @@ public:
|
||||
|
||||
void AVX128_VPCLMULQDQ(OpcodeArgs);
|
||||
|
||||
void AVX128_VFMAImpl(OpcodeArgs, IROps IROp, bool Scalar, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx);
|
||||
void AVX128_VFMAImpl(OpcodeArgs, IROps IROp, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx);
|
||||
void AVX128_VFMAScalarImpl(OpcodeArgs, IROps IROp, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx);
|
||||
void AVX128_VFMAddSubImpl(OpcodeArgs, bool AddSub, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx);
|
||||
|
||||
RefPair AVX128_VPGatherQPSImpl(Ref Dest, Ref Mask, RefVSIB VSIB);
|
||||
|
@ -337,32 +337,32 @@ void OpDispatchBuilder::InstallAVX128Handlers() {
|
||||
{OPD(2, 0b01, 0x96), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAddSubImpl, true, 1, 3, 2>}, // VFMADDSUB
|
||||
{OPD(2, 0b01, 0x97), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAddSubImpl, false, 1, 3, 2>}, // VFMSUBADD
|
||||
|
||||
{OPD(2, 0b01, 0x98), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLA, false, 1, 3, 2>}, // VFMADD
|
||||
{OPD(2, 0b01, 0x99), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLA, true, 1, 3, 2>}, // VFMADD
|
||||
{OPD(2, 0b01, 0x9A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLS, false, 1, 3, 2>}, // VFMSUB
|
||||
{OPD(2, 0b01, 0x9B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLS, true, 1, 3, 2>}, // VFMSUB
|
||||
{OPD(2, 0b01, 0x9C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLA, false, 1, 3, 2>}, // VFNMADD
|
||||
{OPD(2, 0b01, 0x9D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLA, true, 1, 3, 2>}, // VFNMADD
|
||||
{OPD(2, 0b01, 0x9E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLS, false, 1, 3, 2>}, // VFNMSUB
|
||||
{OPD(2, 0b01, 0x9F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLS, true, 1, 3, 2>}, // VFNMSUB
|
||||
{OPD(2, 0b01, 0x98), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLA, 1, 3, 2>}, // VFMADD
|
||||
{OPD(2, 0b01, 0x99), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFMLASCALARINSERT, 1, 3, 2>}, // VFMADD
|
||||
{OPD(2, 0b01, 0x9A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLS, 1, 3, 2>}, // VFMSUB
|
||||
{OPD(2, 0b01, 0x9B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFMLSSCALARINSERT, 1, 3, 2>}, // VFMSUB
|
||||
{OPD(2, 0b01, 0x9C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLA, 1, 3, 2>}, // VFNMADD
|
||||
{OPD(2, 0b01, 0x9D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFNMLASCALARINSERT, 1, 3, 2>}, // VFNMADD
|
||||
{OPD(2, 0b01, 0x9E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLS, 1, 3, 2>}, // VFNMSUB
|
||||
{OPD(2, 0b01, 0x9F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFNMLSSCALARINSERT, 1, 3, 2>}, // VFNMSUB
|
||||
|
||||
{OPD(2, 0b01, 0xA8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLA, false, 2, 1, 3>}, // VFMADD
|
||||
{OPD(2, 0b01, 0xA9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLA, true, 2, 1, 3>}, // VFMADD
|
||||
{OPD(2, 0b01, 0xAA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLS, false, 2, 1, 3>}, // VFMSUB
|
||||
{OPD(2, 0b01, 0xAB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLS, true, 2, 1, 3>}, // VFMSUB
|
||||
{OPD(2, 0b01, 0xAC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLA, false, 2, 1, 3>}, // VFNMADD
|
||||
{OPD(2, 0b01, 0xAD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLA, true, 2, 1, 3>}, // VFNMADD
|
||||
{OPD(2, 0b01, 0xAE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLS, false, 2, 1, 3>}, // VFNMSUB
|
||||
{OPD(2, 0b01, 0xAF), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLS, true, 2, 1, 3>}, // VFNMSUB
|
||||
{OPD(2, 0b01, 0xA8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLA, 2, 1, 3>}, // VFMADD
|
||||
{OPD(2, 0b01, 0xA9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFMLASCALARINSERT, 2, 1, 3>}, // VFMADD
|
||||
{OPD(2, 0b01, 0xAA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLS, 2, 1, 3>}, // VFMSUB
|
||||
{OPD(2, 0b01, 0xAB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFMLSSCALARINSERT, 2, 1, 3>}, // VFMSUB
|
||||
{OPD(2, 0b01, 0xAC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLA, 2, 1, 3>}, // VFNMADD
|
||||
{OPD(2, 0b01, 0xAD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFNMLASCALARINSERT, 2, 1, 3>}, // VFNMADD
|
||||
{OPD(2, 0b01, 0xAE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLS, 2, 1, 3>}, // VFNMSUB
|
||||
{OPD(2, 0b01, 0xAF), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFNMLSSCALARINSERT, 2, 1, 3>}, // VFNMSUB
|
||||
|
||||
{OPD(2, 0b01, 0xB8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLA, false, 2, 3, 1>}, // VFMADD
|
||||
{OPD(2, 0b01, 0xB9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLA, true, 2, 3, 1>}, // VFMADD
|
||||
{OPD(2, 0b01, 0xBA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLS, false, 2, 3, 1>}, // VFMSUB
|
||||
{OPD(2, 0b01, 0xBB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLS, true, 2, 3, 1>}, // VFMSUB
|
||||
{OPD(2, 0b01, 0xBC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLA, false, 2, 3, 1>}, // VFNMADD
|
||||
{OPD(2, 0b01, 0xBD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLA, true, 2, 3, 1>}, // VFNMADD
|
||||
{OPD(2, 0b01, 0xBE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLS, false, 2, 3, 1>}, // VFNMSUB
|
||||
{OPD(2, 0b01, 0xBF), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLS, true, 2, 3, 1>}, // VFNMSUB
|
||||
{OPD(2, 0b01, 0xB8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLA, 2, 3, 1>}, // VFMADD
|
||||
{OPD(2, 0b01, 0xB9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFMLASCALARINSERT, 2, 3, 1>}, // VFMADD
|
||||
{OPD(2, 0b01, 0xBA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLS, 2, 3, 1>}, // VFMSUB
|
||||
{OPD(2, 0b01, 0xBB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFMLSSCALARINSERT, 2, 3, 1>}, // VFMSUB
|
||||
{OPD(2, 0b01, 0xBC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLA, 2, 3, 1>}, // VFNMADD
|
||||
{OPD(2, 0b01, 0xBD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFNMLASCALARINSERT, 2, 3, 1>}, // VFNMADD
|
||||
{OPD(2, 0b01, 0xBE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLS, 2, 3, 1>}, // VFNMSUB
|
||||
{OPD(2, 0b01, 0xBF), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFNMLSSCALARINSERT, 2, 3, 1>}, // VFNMSUB
|
||||
|
||||
{OPD(2, 0b01, 0xA6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAddSubImpl, true, 2, 1, 3>}, // VFMADDSUB
|
||||
{OPD(2, 0b01, 0xA7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAddSubImpl, false, 2, 1, 3>}, // VFMSUBADD
|
||||
@ -2460,42 +2460,50 @@ void OpDispatchBuilder::AVX128_VPCLMULQDQ(OpcodeArgs) {
|
||||
// As shown only the 231 suffixed instructions matches AArch64 behaviour.
|
||||
// FEX will insert moves to transpose the vectors to match AArch64 behaviour for 132 and 213 variants.
|
||||
|
||||
void OpDispatchBuilder::AVX128_VFMAImpl(OpcodeArgs, IROps IROp, bool Scalar, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx) {
|
||||
void OpDispatchBuilder::AVX128_VFMAImpl(OpcodeArgs, IROps IROp, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx) {
|
||||
const auto Size = GetDstSize(Op);
|
||||
const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
const OpSize ElementSize = Op->Flags & X86Tables::DecodeFlags::FLAG_OPTION_AVX_W ? OpSize::i64Bit : OpSize::i32Bit;
|
||||
|
||||
const auto RegisterSize = Scalar ? ElementSize : OpSize::i128Bit;
|
||||
|
||||
auto Dest = AVX128_LoadSource_WithOpSize(Op, Op->Dest, Op->Flags, !Is128Bit);
|
||||
auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
|
||||
auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);
|
||||
|
||||
RefPair Sources[3] = {
|
||||
Dest,
|
||||
Src1,
|
||||
Src2,
|
||||
};
|
||||
RefPair Sources[3] = {Dest, Src1, Src2};
|
||||
|
||||
RefPair Result {};
|
||||
DeriveOp(Result_Low, IROp, _VFMLA(RegisterSize, ElementSize, Sources[Src1Idx - 1].Low, Sources[Src2Idx - 1].Low, Sources[AddendIdx - 1].Low));
|
||||
DeriveOp(Result_Low, IROp, _VFMLA(OpSize::i128Bit, ElementSize, Sources[Src1Idx - 1].Low, Sources[Src2Idx - 1].Low, Sources[AddendIdx - 1].Low));
|
||||
Result.Low = Result_Low;
|
||||
if (Is128Bit) {
|
||||
Result.High = LoadZeroVector(OpSize::i128Bit);
|
||||
if (Scalar) {
|
||||
// Special case, scalar inserts in to the low bits of the destination.
|
||||
///< TODO: This can be optimized with AFP.NEP.
|
||||
Result.Low = _VInsElement(OpSize::i128Bit, ElementSize, 0, 0, Dest.Low, Result.Low);
|
||||
}
|
||||
} else {
|
||||
DeriveOp(Result_High, IROp,
|
||||
_VFMLA(RegisterSize, ElementSize, Sources[Src1Idx - 1].High, Sources[Src2Idx - 1].High, Sources[AddendIdx - 1].High));
|
||||
_VFMLA(OpSize::i128Bit, ElementSize, Sources[Src1Idx - 1].High, Sources[Src2Idx - 1].High, Sources[AddendIdx - 1].High));
|
||||
Result.High = Result_High;
|
||||
}
|
||||
AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::AVX128_VFMAScalarImpl(OpcodeArgs, IROps IROp, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx) {
|
||||
const auto Size = GetDstSize(Op);
|
||||
const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
LOGMAN_THROW_A_FMT(Is128Bit, "This can't be 256-bit");
|
||||
|
||||
const OpSize ElementSize = Op->Flags & X86Tables::DecodeFlags::FLAG_OPTION_AVX_W ? OpSize::i64Bit : OpSize::i32Bit;
|
||||
|
||||
auto Dest = AVX128_LoadSource_WithOpSize(Op, Op->Dest, Op->Flags, !Is128Bit);
|
||||
auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
|
||||
auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);
|
||||
|
||||
RefPair Sources[3] = {Dest, Src1, Src2};
|
||||
|
||||
DeriveOp(Result_Low, IROp,
|
||||
_VFMLAScalarInsert(OpSize::i128Bit, ElementSize, Sources[Src1Idx - 1].Low, Sources[Src2Idx - 1].Low, Sources[AddendIdx - 1].Low));
|
||||
AVX128_StoreResult_WithOpSize(Op, Op->Dest, AVX128_Zext(Result_Low));
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::AVX128_VFMAddSubImpl(OpcodeArgs, bool AddSub, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx) {
|
||||
const auto Size = GetDstSize(Op);
|
||||
const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
@ -1716,6 +1716,42 @@
|
||||
],
|
||||
"DestSize": "RegisterSize",
|
||||
"NumElements": "RegisterSize / ElementSize"
|
||||
},
|
||||
"FPR = VFMLAScalarInsert u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": {
|
||||
"Desc": [
|
||||
"Dest = (Vector1 * Vector2) + Addend",
|
||||
"This explicitly matches x86 FMA semantics because ARM semantics are mind-bending."
|
||||
],
|
||||
"DestSize": "RegisterSize",
|
||||
"NumElements": "RegisterSize / ElementSize",
|
||||
"TiedSource": 2
|
||||
},
|
||||
"FPR = VFMLSScalarInsert u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": {
|
||||
"Desc": [
|
||||
"Dest = (Vector1 * Vector2) - Addend",
|
||||
"This explicitly matches x86 FMA semantics because ARM semantics are mind-bending."
|
||||
],
|
||||
"DestSize": "RegisterSize",
|
||||
"NumElements": "RegisterSize / ElementSize",
|
||||
"TiedSource": 2
|
||||
},
|
||||
"FPR = VFNMLAScalarInsert u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": {
|
||||
"Desc": [
|
||||
"Dest = (-Vector1 * Vector2) + Addend",
|
||||
"This explicitly matches x86 FMA semantics because ARM semantics are mind-bending."
|
||||
],
|
||||
"DestSize": "RegisterSize",
|
||||
"NumElements": "RegisterSize / ElementSize",
|
||||
"TiedSource": 2
|
||||
},
|
||||
"FPR = VFNMLSScalarInsert u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": {
|
||||
"Desc": [
|
||||
"Dest = (-Vector1 * Vector2) - Addend",
|
||||
"This explicitly matches x86 FMA semantics because ARM semantics are mind-bending."
|
||||
],
|
||||
"DestSize": "RegisterSize",
|
||||
"NumElements": "RegisterSize / ElementSize",
|
||||
"TiedSource": 2
|
||||
}
|
||||
},
|
||||
"Vector": {
|
||||
|
@ -4742,10 +4742,10 @@
|
||||
"Map 2 0b01 0x99 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd s2, s16, s18, s17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmadd s0, s16, s18, s17",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmadd132sd xmm0, xmm1, xmm2": {
|
||||
@ -4754,10 +4754,10 @@
|
||||
"Map 2 0b01 0x99 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd d2, d16, d18, d17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmadd d0, d16, d18, d17",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub132ps xmm0, xmm1, xmm2": {
|
||||
@ -4826,10 +4826,10 @@
|
||||
"Map 2 0b01 0x9b 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub s2, s16, s18, s17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmsub s0, s16, s18, s17",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub132sd xmm0, xmm1, xmm2": {
|
||||
@ -4838,10 +4838,10 @@
|
||||
"Map 2 0b01 0x9b 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub d2, d16, d18, d17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmsub d0, d16, d18, d17",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd132ps xmm0, xmm1, xmm2": {
|
||||
@ -4908,10 +4908,10 @@
|
||||
"Map 2 0b01 0x9d 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub s2, s16, s18, s17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmsub s0, s16, s18, s17",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd132sd xmm0, xmm1, xmm2": {
|
||||
@ -4920,10 +4920,10 @@
|
||||
"Map 2 0b01 0x9d 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub d2, d16, d18, d17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmsub d0, d16, d18, d17",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub132ps xmm0, xmm1, xmm2": {
|
||||
@ -4992,10 +4992,10 @@
|
||||
"Map 2 0b01 0x9f 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd s2, s16, s18, s17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmadd s0, s16, s18, s17",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub132sd xmm0, xmm1, xmm2": {
|
||||
@ -5004,10 +5004,10 @@
|
||||
"Map 2 0b01 0x9f 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd d2, d16, d18, d17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmadd d0, d16, d18, d17",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmadd213ps xmm0, xmm1, xmm2": {
|
||||
@ -5074,10 +5074,10 @@
|
||||
"Map 2 0b01 0xa9 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd s2, s17, s16, s18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmadd s0, s17, s16, s18",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmadd213sd xmm0, xmm1, xmm2": {
|
||||
@ -5086,10 +5086,10 @@
|
||||
"Map 2 0b01 0xa9 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd d2, d17, d16, d18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmadd d0, d17, d16, d18",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub213ps xmm0, xmm1, xmm2": {
|
||||
@ -5158,10 +5158,10 @@
|
||||
"Map 2 0b01 0xab 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub s2, s17, s16, s18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmsub s0, s17, s16, s18",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub213sd xmm0, xmm1, xmm2": {
|
||||
@ -5170,10 +5170,10 @@
|
||||
"Map 2 0b01 0xab 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub d2, d17, d16, d18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmsub d0, d17, d16, d18",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd213ps xmm0, xmm1, xmm2": {
|
||||
@ -5240,10 +5240,10 @@
|
||||
"Map 2 0b01 0xad 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub s2, s17, s16, s18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmsub s0, s17, s16, s18",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd213sd xmm0, xmm1, xmm2": {
|
||||
@ -5252,10 +5252,10 @@
|
||||
"Map 2 0b01 0xad 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub d2, d17, d16, d18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmsub d0, d17, d16, d18",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub213ps xmm0, xmm1, xmm2": {
|
||||
@ -5324,10 +5324,10 @@
|
||||
"Map 2 0b01 0xaf 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd s2, s17, s16, s18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmadd s0, s17, s16, s18",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub213sd xmm0, xmm1, xmm2": {
|
||||
@ -5336,10 +5336,10 @@
|
||||
"Map 2 0b01 0xaf 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd d2, d17, d16, d18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmadd d0, d17, d16, d18",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmadd231ps xmm0, xmm1, xmm2": {
|
||||
@ -5398,10 +5398,10 @@
|
||||
"Map 2 0b01 0xb9 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd s2, s17, s18, s16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmadd s0, s17, s18, s16",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmadd231sd xmm0, xmm1, xmm2": {
|
||||
@ -5410,10 +5410,10 @@
|
||||
"Map 2 0b01 0xb9 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd d2, d17, d18, d16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmadd d0, d17, d18, d16",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub231ps xmm0, xmm1, xmm2": {
|
||||
@ -5478,10 +5478,10 @@
|
||||
"Map 2 0b01 0xbb 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub s2, s17, s18, s16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmsub s0, s17, s18, s16",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub231sd xmm0, xmm1, xmm2": {
|
||||
@ -5490,10 +5490,10 @@
|
||||
"Map 2 0b01 0xbb 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub d2, d17, d18, d16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmsub d0, d17, d18, d16",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd231ps xmm0, xmm1, xmm2": {
|
||||
@ -5552,10 +5552,10 @@
|
||||
"Map 2 0b01 0xbd 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub s2, s17, s18, s16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmsub s0, s17, s18, s16",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd231sd xmm0, xmm1, xmm2": {
|
||||
@ -5564,10 +5564,10 @@
|
||||
"Map 2 0b01 0xbd 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub d2, d17, d18, d16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmsub d0, d17, d18, d16",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub231ps xmm0, xmm1, xmm2": {
|
||||
@ -5632,10 +5632,10 @@
|
||||
"Map 2 0b01 0xbf 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd s2, s17, s18, s16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmadd s0, s17, s18, s16",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub231sd xmm0, xmm1, xmm2": {
|
||||
@ -5644,10 +5644,10 @@
|
||||
"Map 2 0b01 0xbf 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd d2, d17, d18, d16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmadd d0, d17, d18, d16",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmaddsub213ps xmm0, xmm1, xmm2": {
|
||||
|
294
unittests/InstructionCountCI/AVX128/VEX_map2_AFP.json
Normal file
294
unittests/InstructionCountCI/AVX128/VEX_map2_AFP.json
Normal file
@ -0,0 +1,294 @@
|
||||
{
|
||||
"Features": {
|
||||
"Bitness": 64,
|
||||
"EnabledHostFeatures": [
|
||||
"AFP"
|
||||
],
|
||||
"DisabledHostFeatures": [
|
||||
"SVE256",
|
||||
"SVE128"
|
||||
]
|
||||
},
|
||||
"Instructions": {
|
||||
"vfmadd132ss xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x99 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd s0, s16, s18, s17",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmadd132sd xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x99 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd d0, d16, d18, d17",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub132ss xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x9b 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub s0, s16, s18, s17",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub132sd xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x9b 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub d0, d16, d18, d17",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd132ss xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x9d 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub s0, s16, s18, s17",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd132sd xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x9d 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub d0, d16, d18, d17",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub132ss xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x9f 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd s0, s16, s18, s17",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub132sd xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x9f 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd d0, d16, d18, d17",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmadd213ss xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xa9 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd s0, s17, s16, s18",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmadd213sd xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xa9 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd d0, d17, d16, d18",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub213ss xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xab 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub s0, s17, s16, s18",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub213sd xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xab 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub d0, d17, d16, d18",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd213ss xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xad 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub s0, s17, s16, s18",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd213sd xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xad 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub d0, d17, d16, d18",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub213ss xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xaf 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd s0, s17, s16, s18",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub213sd xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xaf 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd d0, d17, d16, d18",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmadd231ss xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xb9 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd s16, s17, s18, s16",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmadd231sd xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xb9 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd d16, d17, d18, d16",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub231ss xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xbb 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub s16, s17, s18, s16",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub231sd xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xbb 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub d16, d17, d18, d16",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd231ss xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xbd 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub s16, s17, s18, s16",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd231sd xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xbd 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub d16, d17, d18, d16",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub231ss xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xbf 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd s16, s17, s18, s16",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub231sd xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0xbf 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd d16, d17, d18, d16",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
@ -3044,10 +3044,10 @@
|
||||
"Map 2 0b01 0x99 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd s2, s16, s18, s17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmadd s0, s16, s18, s17",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmadd132sd xmm0, xmm1, xmm2": {
|
||||
@ -3056,10 +3056,10 @@
|
||||
"Map 2 0b01 0x99 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd d2, d16, d18, d17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmadd d0, d16, d18, d17",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub132ps xmm0, xmm1, xmm2": {
|
||||
@ -3126,10 +3126,10 @@
|
||||
"Map 2 0b01 0x9b 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub s2, s16, s18, s17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmsub s0, s16, s18, s17",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub132sd xmm0, xmm1, xmm2": {
|
||||
@ -3138,10 +3138,10 @@
|
||||
"Map 2 0b01 0x9b 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub d2, d16, d18, d17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmsub d0, d16, d18, d17",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd132ps xmm0, xmm1, xmm2": {
|
||||
@ -3208,10 +3208,10 @@
|
||||
"Map 2 0b01 0x9d 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub s2, s16, s18, s17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmsub s0, s16, s18, s17",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd132sd xmm0, xmm1, xmm2": {
|
||||
@ -3220,10 +3220,10 @@
|
||||
"Map 2 0b01 0x9d 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub d2, d16, d18, d17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmsub d0, d16, d18, d17",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub132ps xmm0, xmm1, xmm2": {
|
||||
@ -3290,10 +3290,10 @@
|
||||
"Map 2 0b01 0x9f 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd s2, s16, s18, s17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmadd s0, s16, s18, s17",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub132sd xmm0, xmm1, xmm2": {
|
||||
@ -3302,10 +3302,10 @@
|
||||
"Map 2 0b01 0x9f 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd d2, d16, d18, d17",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmadd d0, d16, d18, d17",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmadd213ps xmm0, xmm1, xmm2": {
|
||||
@ -3372,10 +3372,10 @@
|
||||
"Map 2 0b01 0xa9 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd s2, s17, s16, s18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmadd s0, s17, s16, s18",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmadd213sd xmm0, xmm1, xmm2": {
|
||||
@ -3384,10 +3384,10 @@
|
||||
"Map 2 0b01 0xa9 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd d2, d17, d16, d18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmadd d0, d17, d16, d18",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub213ps xmm0, xmm1, xmm2": {
|
||||
@ -3454,10 +3454,10 @@
|
||||
"Map 2 0b01 0xab 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub s2, s17, s16, s18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmsub s0, s17, s16, s18",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub213sd xmm0, xmm1, xmm2": {
|
||||
@ -3466,10 +3466,10 @@
|
||||
"Map 2 0b01 0xab 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub d2, d17, d16, d18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmsub d0, d17, d16, d18",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd213ps xmm0, xmm1, xmm2": {
|
||||
@ -3536,10 +3536,10 @@
|
||||
"Map 2 0b01 0xad 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub s2, s17, s16, s18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmsub s0, s17, s16, s18",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd213sd xmm0, xmm1, xmm2": {
|
||||
@ -3548,10 +3548,10 @@
|
||||
"Map 2 0b01 0xad 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub d2, d17, d16, d18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmsub d0, d17, d16, d18",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub213ps xmm0, xmm1, xmm2": {
|
||||
@ -3618,10 +3618,10 @@
|
||||
"Map 2 0b01 0xaf 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd s2, s17, s16, s18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmadd s0, s17, s16, s18",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub213sd xmm0, xmm1, xmm2": {
|
||||
@ -3630,10 +3630,10 @@
|
||||
"Map 2 0b01 0xaf 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd d2, d17, d16, d18",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmadd d0, d17, d16, d18",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmadd231ps xmm0, xmm1, xmm2": {
|
||||
@ -3692,10 +3692,10 @@
|
||||
"Map 2 0b01 0xb9 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd s2, s17, s18, s16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmadd s0, s17, s18, s16",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmadd231sd xmm0, xmm1, xmm2": {
|
||||
@ -3704,10 +3704,10 @@
|
||||
"Map 2 0b01 0xb9 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmadd d2, d17, d18, d16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmadd d0, d17, d18, d16",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub231ps xmm0, xmm1, xmm2": {
|
||||
@ -3766,10 +3766,10 @@
|
||||
"Map 2 0b01 0xbb 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub s2, s17, s18, s16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmsub s0, s17, s18, s16",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmsub231sd xmm0, xmm1, xmm2": {
|
||||
@ -3778,10 +3778,10 @@
|
||||
"Map 2 0b01 0xbb 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmsub d2, d17, d18, d16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmsub d0, d17, d18, d16",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd231ps xmm0, xmm1, xmm2": {
|
||||
@ -3840,10 +3840,10 @@
|
||||
"Map 2 0b01 0xbd 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub s2, s17, s18, s16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmsub s0, s17, s18, s16",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmadd231sd xmm0, xmm1, xmm2": {
|
||||
@ -3852,10 +3852,10 @@
|
||||
"Map 2 0b01 0xbd 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fmsub d2, d17, d18, d16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fmsub d0, d17, d18, d16",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub231ps xmm0, xmm1, xmm2": {
|
||||
@ -3914,10 +3914,10 @@
|
||||
"Map 2 0b01 0xbf 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd s2, s17, s18, s16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.s[0], v2.s[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmadd s0, s17, s18, s16",
|
||||
"mov v16.s[0], v0.s[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfnmsub231sd xmm0, xmm1, xmm2": {
|
||||
@ -3926,10 +3926,10 @@
|
||||
"Map 2 0b01 0xbf 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fnmadd d2, d17, d18, d16",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v16.d[0], v2.d[0]",
|
||||
"str q3, [x28, #16]"
|
||||
"fnmadd d0, d17, d18, d16",
|
||||
"mov v16.d[0], v0.d[0]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vfmaddsub213ps xmm0, xmm1, xmm2": {
|
||||
|
Loading…
x
Reference in New Issue
Block a user