mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-11-24 15:10:01 +00:00
Merge pull request #3200 from lioncash/mov
OpcodeDispatcher: Remove unnecessary 128-bit truncating moves from StoreResult
This commit is contained in:
commit
ef321e4bf8
@ -1520,13 +1520,13 @@ DEF_OP(VBroadcastFromMem) {
|
||||
ElementSize == 4 || ElementSize == 8 ||
|
||||
ElementSize == 16, "Invalid element size");
|
||||
|
||||
if (HostSupportsSVE128 || HostSupportsSVE256) {
|
||||
if (Is256Bit) {
|
||||
LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use SVE 256-bit broadcast");
|
||||
}
|
||||
if (Is256Bit && !HostSupportsSVE256) {
|
||||
LOGMAN_MSG_A_FMT("{}: 256-bit vectors must support SVE256", __func__);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto GoverningPredicate = Is256Bit ? PRED_TMP_32B.Zeroing()
|
||||
: PRED_TMP_16B.Zeroing();
|
||||
if (Is256Bit && HostSupportsSVE256) {
|
||||
const auto GoverningPredicate = PRED_TMP_32B.Zeroing();
|
||||
|
||||
switch (ElementSize) {
|
||||
case 1:
|
||||
|
@ -5260,11 +5260,8 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl
|
||||
LOGMAN_THROW_A_FMT(Class != IR::GPRClass, "Partial writes from GPR not allowed. Instruction: {}",
|
||||
Op->TableInfo->Name);
|
||||
|
||||
// OpSize of 16 is special in that it is expected to zero the upper bits of the 256-bit operation.
|
||||
// TODO: Longer term we should enforce the difference between zero and insert.
|
||||
if (VectorSize == Core::CPUState::XMM_AVX_REG_SIZE && OpSize == Core::CPUState::XMM_SSE_REG_SIZE) {
|
||||
Result = _VMov(OpSize, Src);
|
||||
} else {
|
||||
// XMM-size is handled in implementations.
|
||||
if (VectorSize != Core::CPUState::XMM_AVX_REG_SIZE || OpSize != Core::CPUState::XMM_SSE_REG_SIZE) {
|
||||
auto SrcVector = LoadXMMRegister(gprIndex);
|
||||
Result = _VInsElement(VectorSize, OpSize, 0, 0, SrcVector, Src);
|
||||
}
|
||||
@ -5884,12 +5881,12 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
|
||||
|
||||
#define OPD(map_select, pp, opcode) (((map_select - 1) << 10) | (pp << 8) | (opcode))
|
||||
static constexpr std::tuple<uint16_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> AVXTable[] = {
|
||||
{OPD(1, 0b00, 0x10), 1, &OpDispatchBuilder::MOVUPS_MOVUPDOp},
|
||||
{OPD(1, 0b01, 0x10), 1, &OpDispatchBuilder::MOVUPS_MOVUPDOp},
|
||||
{OPD(1, 0b00, 0x10), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp},
|
||||
{OPD(1, 0b01, 0x10), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp},
|
||||
{OPD(1, 0b10, 0x10), 1, &OpDispatchBuilder::VMOVSSOp},
|
||||
{OPD(1, 0b11, 0x10), 1, &OpDispatchBuilder::VMOVSDOp},
|
||||
{OPD(1, 0b00, 0x11), 1, &OpDispatchBuilder::MOVUPS_MOVUPDOp},
|
||||
{OPD(1, 0b01, 0x11), 1, &OpDispatchBuilder::MOVUPS_MOVUPDOp},
|
||||
{OPD(1, 0b00, 0x11), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp},
|
||||
{OPD(1, 0b01, 0x11), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp},
|
||||
{OPD(1, 0b10, 0x11), 1, &OpDispatchBuilder::VMOVSSOp},
|
||||
{OPD(1, 0b11, 0x11), 1, &OpDispatchBuilder::VMOVSDOp},
|
||||
|
||||
@ -5912,10 +5909,10 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
|
||||
{OPD(1, 0b00, 0x17), 1, &OpDispatchBuilder::VMOVHPOp},
|
||||
{OPD(1, 0b01, 0x17), 1, &OpDispatchBuilder::VMOVHPOp},
|
||||
|
||||
{OPD(1, 0b00, 0x28), 1, &OpDispatchBuilder::MOVAPS_MOVAPDOp},
|
||||
{OPD(1, 0b01, 0x28), 1, &OpDispatchBuilder::MOVAPS_MOVAPDOp},
|
||||
{OPD(1, 0b00, 0x29), 1, &OpDispatchBuilder::MOVAPS_MOVAPDOp},
|
||||
{OPD(1, 0b01, 0x29), 1, &OpDispatchBuilder::MOVAPS_MOVAPDOp},
|
||||
{OPD(1, 0b00, 0x28), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},
|
||||
{OPD(1, 0b01, 0x28), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},
|
||||
{OPD(1, 0b00, 0x29), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},
|
||||
{OPD(1, 0b01, 0x29), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},
|
||||
|
||||
{OPD(1, 0b10, 0x2A), 1, &OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<4>},
|
||||
{OPD(1, 0b11, 0x2A), 1, &OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<8>},
|
||||
@ -5970,8 +5967,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
|
||||
{OPD(1, 0b10, 0x59), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFMULSCALARINSERT, 4>},
|
||||
{OPD(1, 0b11, 0x59), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFMULSCALARINSERT, 8>},
|
||||
|
||||
{OPD(1, 0b00, 0x5A), 1, &OpDispatchBuilder::Vector_CVT_Float_To_Float<8, 4>},
|
||||
{OPD(1, 0b01, 0x5A), 1, &OpDispatchBuilder::Vector_CVT_Float_To_Float<4, 8>},
|
||||
{OPD(1, 0b00, 0x5A), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Float<8, 4>},
|
||||
{OPD(1, 0b01, 0x5A), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Float<4, 8>},
|
||||
{OPD(1, 0b10, 0x5A), 1, &OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<8, 4>},
|
||||
{OPD(1, 0b11, 0x5A), 1, &OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<4, 8>},
|
||||
|
||||
@ -6015,8 +6012,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
|
||||
{OPD(1, 0b01, 0x6D), 1, &OpDispatchBuilder::VPUNPCKHOp<8>},
|
||||
{OPD(1, 0b01, 0x6E), 1, &OpDispatchBuilder::MOVBetweenGPR_FPR},
|
||||
|
||||
{OPD(1, 0b01, 0x6F), 1, &OpDispatchBuilder::MOVAPS_MOVAPDOp},
|
||||
{OPD(1, 0b10, 0x6F), 1, &OpDispatchBuilder::MOVUPS_MOVUPDOp},
|
||||
{OPD(1, 0b01, 0x6F), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},
|
||||
{OPD(1, 0b10, 0x6F), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp},
|
||||
|
||||
{OPD(1, 0b01, 0x70), 1, &OpDispatchBuilder::VPSHUFWOp<4, true>},
|
||||
{OPD(1, 0b10, 0x70), 1, &OpDispatchBuilder::VPSHUFWOp<2, false>},
|
||||
@ -6036,8 +6033,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
|
||||
{OPD(1, 0b01, 0x7E), 1, &OpDispatchBuilder::MOVBetweenGPR_FPR},
|
||||
{OPD(1, 0b10, 0x7E), 1, &OpDispatchBuilder::MOVQOp},
|
||||
|
||||
{OPD(1, 0b01, 0x7F), 1, &OpDispatchBuilder::MOVAPS_MOVAPDOp},
|
||||
{OPD(1, 0b10, 0x7F), 1, &OpDispatchBuilder::MOVUPS_MOVUPDOp},
|
||||
{OPD(1, 0b01, 0x7F), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},
|
||||
{OPD(1, 0b10, 0x7F), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp},
|
||||
|
||||
{OPD(1, 0b00, 0xC2), 1, &OpDispatchBuilder::AVXVFCMPOp<4>},
|
||||
{OPD(1, 0b01, 0xC2), 1, &OpDispatchBuilder::AVXVFCMPOp<8>},
|
||||
|
@ -482,6 +482,9 @@ public:
|
||||
template<FEXCore::IR::IROps IROp, size_t ElementSize>
|
||||
void AVXVectorScalarUnaryInsertALUOp(OpcodeArgs);
|
||||
|
||||
template<size_t DstElementSize, size_t SrcElementSize>
|
||||
void AVXVector_CVT_Float_To_Float(OpcodeArgs);
|
||||
|
||||
void InsertMMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs);
|
||||
template<size_t DstElementSize>
|
||||
void InsertCVTGPR_To_FPR(OpcodeArgs);
|
||||
@ -552,6 +555,9 @@ public:
|
||||
void VMOVSDOp(OpcodeArgs);
|
||||
void VMOVSSOp(OpcodeArgs);
|
||||
|
||||
void VMOVAPS_VMOVAPDOp(OpcodeArgs);
|
||||
void VMOVUPS_VMOVUPDOp(OpcodeArgs);
|
||||
|
||||
void VMPSADBWOp(OpcodeArgs);
|
||||
|
||||
template <size_t ElementSize>
|
||||
@ -1121,7 +1127,7 @@ private:
|
||||
const X86Tables::DecodedOperand& Src1Op,
|
||||
const X86Tables::DecodedOperand& Src2Op);
|
||||
|
||||
void Vector_CVT_Float_To_FloatImpl(OpcodeArgs, size_t DstElementSize, size_t SrcElementSize);
|
||||
void Vector_CVT_Float_To_FloatImpl(OpcodeArgs, size_t DstElementSize, size_t SrcElementSize, bool IsAVX);
|
||||
|
||||
OrderedNode* Vector_CVT_Float_To_IntImpl(OpcodeArgs, size_t SrcElementSize, bool Narrow, bool HostRoundingMode);
|
||||
|
||||
|
@ -45,11 +45,35 @@ void OpDispatchBuilder::MOVAPS_MOVAPDOp(OpcodeArgs) {
|
||||
StoreResult(FPRClass, Op, Src, -1);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::VMOVAPS_VMOVAPDOp(OpcodeArgs) {
|
||||
const auto SrcSize = GetSrcSize(Op);
|
||||
const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
|
||||
|
||||
if (Is128Bit && Op->Dest.IsGPR()) {
|
||||
Src = _VMov(16, Src);
|
||||
}
|
||||
StoreResult(FPRClass, Op, Src, -1);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::MOVUPS_MOVUPDOp(OpcodeArgs) {
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1});
|
||||
StoreResult(FPRClass, Op, Src, 1);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::VMOVUPS_VMOVUPDOp(OpcodeArgs) {
|
||||
const auto SrcSize = GetSrcSize(Op);
|
||||
const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1});
|
||||
|
||||
if (Is128Bit && Op->Dest.IsGPR()) {
|
||||
Src = _VMov(16, Src);
|
||||
}
|
||||
StoreResult(FPRClass, Op, Src, 1);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::MOVHPDOp(OpcodeArgs) {
|
||||
if (Op->Dest.IsGPR()) {
|
||||
if (Op->Src[0].IsGPR()) {
|
||||
@ -1810,18 +1834,26 @@ void OpDispatchBuilder::PINSROp<8>(OpcodeArgs);
|
||||
|
||||
void OpDispatchBuilder::VPINSRBOp(OpcodeArgs) {
|
||||
OrderedNode *Result = PINSROpImpl(Op, 1, Op->Src[0], Op->Src[1], Op->Src[2]);
|
||||
if (Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) {
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::VPINSRDQOp(OpcodeArgs) {
|
||||
const auto SrcSize = GetSrcSize(Op);
|
||||
OrderedNode *Result = PINSROpImpl(Op, SrcSize, Op->Src[0], Op->Src[1], Op->Src[2]);
|
||||
|
||||
if (Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) {
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::VPINSRWOp(OpcodeArgs) {
|
||||
OrderedNode *Result = PINSROpImpl(Op, 2, Op->Src[0], Op->Src[1], Op->Src[2]);
|
||||
if (Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) {
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
@ -2026,10 +2058,16 @@ void OpDispatchBuilder::PSRLDOp<8>(OpcodeArgs);
|
||||
|
||||
template <size_t ElementSize>
|
||||
void OpDispatchBuilder::VPSRLDOp(OpcodeArgs) {
|
||||
const auto DstSize = GetDstSize(Op);
|
||||
const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
|
||||
OrderedNode *Shift = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
|
||||
OrderedNode *Result = PSRLDOpImpl(Op, ElementSize, Src, Shift);
|
||||
|
||||
if (Is128Bit) {
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
@ -2066,6 +2104,7 @@ void OpDispatchBuilder::PSRLI<8>(OpcodeArgs);
|
||||
template <size_t ElementSize>
|
||||
void OpDispatchBuilder::VPSRLIOp(OpcodeArgs) {
|
||||
const auto Size = GetSrcSize(Op);
|
||||
const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here");
|
||||
const uint64_t ShiftConstant = Op->Src[1].Data.Literal.Value;
|
||||
@ -2075,6 +2114,10 @@ void OpDispatchBuilder::VPSRLIOp(OpcodeArgs) {
|
||||
|
||||
if (ShiftConstant != 0) [[likely]] {
|
||||
Result = _VUShrI(Size, ElementSize, Src, ShiftConstant);
|
||||
} else {
|
||||
if (Is128Bit) {
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
}
|
||||
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
@ -2123,9 +2166,15 @@ template <size_t ElementSize>
|
||||
void OpDispatchBuilder::VPSLLIOp(OpcodeArgs) {
|
||||
LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here");
|
||||
const uint64_t ShiftConstant = Op->Src[1].Data.Literal.Value;
|
||||
const auto DstSize = GetDstSize(Op);
|
||||
const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
|
||||
OrderedNode *Result = PSLLIImpl(Op, ElementSize, Src, ShiftConstant);
|
||||
if (ShiftConstant == 0 && Is128Bit) {
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
@ -2162,10 +2211,16 @@ void OpDispatchBuilder::PSLL<8>(OpcodeArgs);
|
||||
|
||||
template <size_t ElementSize>
|
||||
void OpDispatchBuilder::VPSLLOp(OpcodeArgs) {
|
||||
const auto DstSize = GetDstSize(Op);
|
||||
const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
|
||||
OrderedNode *Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], 16, Op->Flags);
|
||||
OrderedNode *Result = PSLLImpl(Op, ElementSize, Src1, Src2);
|
||||
|
||||
if (Is128Bit) {
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
@ -2200,10 +2255,16 @@ void OpDispatchBuilder::PSRAOp<4>(OpcodeArgs);
|
||||
|
||||
template <size_t ElementSize>
|
||||
void OpDispatchBuilder::VPSRAOp(OpcodeArgs) {
|
||||
const auto DstSize = GetDstSize(Op);
|
||||
const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
|
||||
OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
|
||||
OrderedNode *Result = PSRAOpImpl(Op, ElementSize, Src1, Src2);
|
||||
|
||||
if (Is128Bit) {
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
@ -2242,7 +2303,11 @@ void OpDispatchBuilder::VPSRLDQOp(OpcodeArgs) {
|
||||
|
||||
OrderedNode *Result{};
|
||||
if (Shift == 0) [[unlikely]] {
|
||||
Result = Src;
|
||||
if (Is128Bit) {
|
||||
Result = _VMov(16, Src);
|
||||
} else {
|
||||
Result = Src;
|
||||
}
|
||||
} else {
|
||||
Result = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO);
|
||||
|
||||
@ -2292,7 +2357,12 @@ void OpDispatchBuilder::VPSLLDQOp(OpcodeArgs) {
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
|
||||
|
||||
OrderedNode *Result = Src;
|
||||
if (Shift != 0) {
|
||||
|
||||
if (Shift == 0) {
|
||||
if (Is128Bit) {
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
} else {
|
||||
Result = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO);
|
||||
if (Is128Bit) {
|
||||
if (Shift < DstSize) {
|
||||
@ -2336,12 +2406,17 @@ void OpDispatchBuilder::VPSRAIOp(OpcodeArgs) {
|
||||
LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here");
|
||||
const uint64_t Shift = Op->Src[1].Data.Literal.Value;
|
||||
const auto Size = GetDstSize(Op);
|
||||
const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
|
||||
OrderedNode *Result = Src;
|
||||
|
||||
if (Shift != 0) [[likely]] {
|
||||
Result = _VSShrI(Size, ElementSize, Src, Shift);
|
||||
} else {
|
||||
if (Is128Bit) {
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
}
|
||||
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
@ -2637,12 +2712,12 @@ void OpDispatchBuilder::AVXScalar_CVT_Float_To_Float<4, 8>(OpcodeArgs);
|
||||
template
|
||||
void OpDispatchBuilder::AVXScalar_CVT_Float_To_Float<8, 4>(OpcodeArgs);
|
||||
|
||||
void OpDispatchBuilder::Vector_CVT_Float_To_FloatImpl(OpcodeArgs, size_t DstElementSize, size_t SrcElementSize) {
|
||||
const auto IsFloatSrc = SrcElementSize == 4;
|
||||
|
||||
void OpDispatchBuilder::Vector_CVT_Float_To_FloatImpl(OpcodeArgs, size_t DstElementSize, size_t SrcElementSize, bool IsAVX) {
|
||||
const auto SrcSize = GetSrcSize(Op);
|
||||
const auto StoreSize = IsFloatSrc ? SrcSize
|
||||
: 16;
|
||||
|
||||
const auto IsFloatSrc = SrcElementSize == 4;
|
||||
const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
const auto LoadSize = IsFloatSrc && !Op->Src[0].IsGPR() ?
|
||||
SrcSize / 2 :
|
||||
SrcSize;
|
||||
@ -2656,19 +2731,36 @@ void OpDispatchBuilder::Vector_CVT_Float_To_FloatImpl(OpcodeArgs, size_t DstElem
|
||||
Result = _Vector_FToF(SrcSize, SrcElementSize >> 1, Src, SrcElementSize);
|
||||
}
|
||||
|
||||
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, StoreSize, -1);
|
||||
if (IsAVX) {
|
||||
if (!IsFloatSrc && !Is128Bit) {
|
||||
// VCVTPD2PS path
|
||||
Result = _VMov(16, Result);
|
||||
} else if (IsFloatSrc && Is128Bit) {
|
||||
// VCVTPS2PD path
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
}
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
template<size_t DstElementSize, size_t SrcElementSize>
|
||||
void OpDispatchBuilder::Vector_CVT_Float_To_Float(OpcodeArgs) {
|
||||
Vector_CVT_Float_To_FloatImpl(Op, DstElementSize, SrcElementSize);
|
||||
Vector_CVT_Float_To_FloatImpl(Op, DstElementSize, SrcElementSize, false);
|
||||
}
|
||||
|
||||
template
|
||||
void OpDispatchBuilder::Vector_CVT_Float_To_Float<4, 8>(OpcodeArgs);
|
||||
template
|
||||
void OpDispatchBuilder::Vector_CVT_Float_To_Float<8, 4>(OpcodeArgs);
|
||||
|
||||
template<size_t DstElementSize, size_t SrcElementSize>
|
||||
void OpDispatchBuilder::AVXVector_CVT_Float_To_Float(OpcodeArgs) {
|
||||
Vector_CVT_Float_To_FloatImpl(Op, DstElementSize, SrcElementSize, true);
|
||||
}
|
||||
template
|
||||
void OpDispatchBuilder::AVXVector_CVT_Float_To_Float<4, 8>(OpcodeArgs);
|
||||
template
|
||||
void OpDispatchBuilder::AVXVector_CVT_Float_To_Float<8, 4>(OpcodeArgs);
|
||||
|
||||
void OpDispatchBuilder::MMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs) {
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
|
||||
|
||||
@ -2760,8 +2852,14 @@ void OpDispatchBuilder::VMASKMOVOpImpl(OpcodeArgs, size_t ElementSize, size_t Da
|
||||
OrderedNode *Address = MakeAddress(Op->Dest);
|
||||
_VStoreVectorMasked(DataSize, ElementSize, Mask, Data, Address, Invalid(), MEM_OFFSET_SXTX, 1);
|
||||
} else {
|
||||
const auto Is128Bit = GetDstSize(Op) == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
OrderedNode *Address = MakeAddress(DataOp);
|
||||
OrderedNode *Result = _VLoadVectorMasked(DataSize, ElementSize, Mask, Address, Invalid(), MEM_OFFSET_SXTX, 1);
|
||||
|
||||
if (Is128Bit) {
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
}
|
||||
@ -3823,10 +3921,16 @@ void OpDispatchBuilder::PMULHW<true>(OpcodeArgs);
|
||||
|
||||
template <bool Signed>
|
||||
void OpDispatchBuilder::VPMULHWOp(OpcodeArgs) {
|
||||
const auto DstSize = GetDstSize(Op);
|
||||
const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
|
||||
OrderedNode *Result = PMULHWOpImpl(Op, Signed, Dest, Src);
|
||||
|
||||
if (Is128Bit) {
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
@ -4906,12 +5010,14 @@ void OpDispatchBuilder::VBLENDPDOp(OpcodeArgs) {
|
||||
const auto Selector = Op->Src[2].Data.Literal.Value;
|
||||
|
||||
if (Selector == 0) {
|
||||
StoreResult(FPRClass, Op, Src1, -1);
|
||||
OrderedNode *Result = Is256Bit ? Src1 : _VMov(16, Src1);
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
return;
|
||||
}
|
||||
// Only the first four bits of the 8-bit immediate are used, so only check them.
|
||||
if (((Selector & 0b11) == 0b11 && !Is256Bit) || (Selector & 0b1111) == 0b1111) {
|
||||
StoreResult(FPRClass, Op, Src2, -1);
|
||||
OrderedNode *Result = Is256Bit ? Src2 : _VMov(16, Src2);
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -4940,11 +5046,13 @@ void OpDispatchBuilder::VPBLENDDOp(OpcodeArgs) {
|
||||
// silly is happening, we have your back.
|
||||
|
||||
if (Selector == 0) {
|
||||
StoreResult(FPRClass, Op, Src1, -1);
|
||||
OrderedNode* Result = Is256Bit ? Src1 : _VMov(16, Src1);
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
return;
|
||||
}
|
||||
if (Selector == 0xFF && Is256Bit) {
|
||||
StoreResult(FPRClass, Op, Src2, -1);
|
||||
OrderedNode* Result = Is256Bit ? Src2 : _VMov(16, Src2);
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
return;
|
||||
}
|
||||
// The only bits we care about from the 8-bit immediate for 128-bit operations
|
||||
@ -4952,17 +5060,21 @@ void OpDispatchBuilder::VPBLENDDOp(OpcodeArgs) {
|
||||
// silliness is going on and the upper bits are being set even when they'll
|
||||
// be ignored
|
||||
if ((Selector & 0xF) == 0xF && !Is256Bit) {
|
||||
StoreResult(FPRClass, Op, Src2, -1);
|
||||
StoreResult(FPRClass, Op, _VMov(16, Src2), -1);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto ZeroRegister = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO);
|
||||
OrderedNode *Result = VBLENDOpImpl(*this, DstSize, 4, Src1, Src2, ZeroRegister, Selector);
|
||||
if (!Is256Bit) {
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::VPBLENDWOp(OpcodeArgs) {
|
||||
const auto DstSize = GetDstSize(Op);
|
||||
const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
|
||||
OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
|
||||
@ -4971,11 +5083,13 @@ void OpDispatchBuilder::VPBLENDWOp(OpcodeArgs) {
|
||||
const auto Selector = Op->Src[2].Data.Literal.Value;
|
||||
|
||||
if (Selector == 0) {
|
||||
StoreResult(FPRClass, Op, Src1, -1);
|
||||
OrderedNode *Result = Is128Bit ? _VMov(16, Src1) : Src1;
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
return;
|
||||
}
|
||||
if (Selector == 0xFF) {
|
||||
StoreResult(FPRClass, Op, Src2, -1);
|
||||
OrderedNode *Result = Is128Bit ? _VMov(16, Src2) : Src2;
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -4986,6 +5100,9 @@ void OpDispatchBuilder::VPBLENDWOp(OpcodeArgs) {
|
||||
|
||||
const auto ZeroRegister = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO);
|
||||
OrderedNode *Result = VBLENDOpImpl(*this, DstSize, 2, Src1, Src2, ZeroRegister, NewSelector);
|
||||
if (Is128Bit) {
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
|
@ -435,6 +435,58 @@
|
||||
"mov v16.16b, v17.16b",
|
||||
"fmax d16, d17, d18"
|
||||
]
|
||||
},
|
||||
"vminps xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b00 0x5d 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fcmgt v0.4s, v18.4s, v17.4s",
|
||||
"mov v16.16b, v17.16b",
|
||||
"bif v16.16b, v18.16b, v0.16b"
|
||||
]
|
||||
},
|
||||
"vminps ymm0, ymm1, ymm2": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b00 0x5d 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fcmgt p0.s, p7/z, z18.s, z17.s",
|
||||
"not p0.b, p7/z, p0.b",
|
||||
"mov z0.d, z17.d",
|
||||
"mov z0.s, p0/m, z18.s",
|
||||
"mov z16.d, z0.d"
|
||||
]
|
||||
},
|
||||
"vminpd xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b01 0x5d 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fcmgt v0.2d, v18.2d, v17.2d",
|
||||
"mov v16.16b, v17.16b",
|
||||
"bif v16.16b, v18.16b, v0.16b"
|
||||
]
|
||||
},
|
||||
"vminpd ymm0, ymm1, ymm2": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b01 0x5d 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fcmgt p0.d, p7/z, z18.d, z17.d",
|
||||
"not p0.b, p7/z, p0.b",
|
||||
"mov z0.d, z17.d",
|
||||
"mov z0.d, p0/m, z18.d",
|
||||
"mov z16.d, z0.d"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -12,14 +12,13 @@
|
||||
},
|
||||
"Instructions": {
|
||||
"vrsqrtps xmm0, xmm1": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 1 0b00 0x52 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"frsqrte v2.4s, v17.4s",
|
||||
"mov v16.16b, v2.16b"
|
||||
"frsqrte v16.4s, v17.4s"
|
||||
]
|
||||
},
|
||||
"vrsqrtps ymm0, ymm1": {
|
||||
@ -46,14 +45,13 @@
|
||||
]
|
||||
},
|
||||
"vrcpps xmm0, xmm1": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 1 0b00 0x53 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"frecpe v2.4s, v17.4s",
|
||||
"mov v16.16b, v2.16b"
|
||||
"frecpe v16.4s, v17.4s"
|
||||
]
|
||||
},
|
||||
"vrcpps ymm0, ymm1": {
|
||||
|
@ -11,14 +11,13 @@
|
||||
},
|
||||
"Instructions": {
|
||||
"vrsqrtps xmm0, xmm1": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 1 0b00 0x52 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"frsqrte v2.4s, v17.4s",
|
||||
"mov v16.16b, v2.16b"
|
||||
"frsqrte v16.4s, v17.4s"
|
||||
]
|
||||
},
|
||||
"vrsqrtps ymm0, ymm1": {
|
||||
@ -44,14 +43,13 @@
|
||||
]
|
||||
},
|
||||
"vrcpps xmm0, xmm1": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 1 0b00 0x53 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"frecpe v2.4s, v17.4s",
|
||||
"mov v16.16b, v2.16b"
|
||||
"frecpe v16.4s, v17.4s"
|
||||
]
|
||||
},
|
||||
"vrcpps ymm0, ymm1": {
|
||||
|
@ -9,29 +9,25 @@
|
||||
},
|
||||
"Instructions": {
|
||||
"pmulhuw xmm0, xmm1": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"SVE-256bit changes behaviour slightly",
|
||||
"0x66 0x0f 0xe4"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movprfx z2, z16",
|
||||
"umulh z2.h, p6/m, z2.h, z17.h",
|
||||
"mov v16.16b, v2.16b"
|
||||
"umulh z16.h, p6/m, z16.h, z17.h"
|
||||
]
|
||||
},
|
||||
"pmulhw xmm0, xmm1": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"SVE-256bit changes behaviour slightly",
|
||||
"0x66 0x0f 0xe5"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movprfx z2, z16",
|
||||
"smulh z2.h, p6/m, z2.h, z17.h",
|
||||
"mov v16.16b, v2.16b"
|
||||
"smulh z16.h, p6/m, z16.h, z17.h"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -10,15 +10,14 @@
|
||||
},
|
||||
"Instructions": {
|
||||
"vaddsubpd xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 1 0b01 0xd0 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ext v2.16b, v18.16b, v18.16b, #8",
|
||||
"fcadd v2.2d, v17.2d, v2.2d, #90",
|
||||
"mov v16.16b, v2.16b"
|
||||
"fcadd v16.2d, v17.2d, v2.2d, #90"
|
||||
]
|
||||
},
|
||||
"vaddsubpd ymm0, ymm1, ymm2": {
|
||||
@ -37,15 +36,14 @@
|
||||
]
|
||||
},
|
||||
"vaddsubps xmm0, xmm1, xmm2": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b11 0xd0 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"rev64 v2.4s, v18.4s",
|
||||
"fcadd v2.4s, v17.4s, v2.4s, #90",
|
||||
"mov v16.16b, v2.16b"
|
||||
"fcadd v16.4s, v17.4s, v2.4s, #90"
|
||||
]
|
||||
},
|
||||
"vaddsubps ymm0, ymm1, ymm2": {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -437,8 +437,8 @@
|
||||
"mov v2.s[0], v17.s[0]",
|
||||
"mov v2.s[1], v17.s[0]",
|
||||
"mov v2.s[2], v17.s[0]",
|
||||
"mov v2.s[3], v17.s[0]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"mov v16.s[3], v17.s[0]"
|
||||
]
|
||||
},
|
||||
"vpermilps xmm0, xmm1, 01010101b": {
|
||||
@ -452,8 +452,8 @@
|
||||
"mov v2.s[0], v17.s[1]",
|
||||
"mov v2.s[1], v17.s[1]",
|
||||
"mov v2.s[2], v17.s[1]",
|
||||
"mov v2.s[3], v17.s[1]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"mov v16.s[3], v17.s[1]"
|
||||
]
|
||||
},
|
||||
"vpermilps xmm0, xmm1, 10101010b": {
|
||||
@ -467,8 +467,8 @@
|
||||
"mov v2.s[0], v17.s[2]",
|
||||
"mov v2.s[1], v17.s[2]",
|
||||
"mov v2.s[2], v17.s[2]",
|
||||
"mov v2.s[3], v17.s[2]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"mov v16.s[3], v17.s[2]"
|
||||
]
|
||||
},
|
||||
"vpermilps xmm0, xmm1, 11111111b": {
|
||||
@ -482,8 +482,8 @@
|
||||
"mov v2.s[0], v17.s[3]",
|
||||
"mov v2.s[1], v17.s[3]",
|
||||
"mov v2.s[2], v17.s[3]",
|
||||
"mov v2.s[3], v17.s[3]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"mov v16.s[3], v17.s[3]"
|
||||
]
|
||||
},
|
||||
"vpermilps ymm0, ymm1, 00000000b": {
|
||||
@ -667,8 +667,8 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v2.d[0], v17.d[0]",
|
||||
"mov v2.d[1], v17.d[0]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"mov v16.d[1], v17.d[0]"
|
||||
]
|
||||
},
|
||||
"vpermilpd xmm0, xmm1, 01b": {
|
||||
@ -680,8 +680,8 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v2.d[0], v17.d[1]",
|
||||
"mov v2.d[1], v17.d[0]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"mov v16.d[1], v17.d[0]"
|
||||
]
|
||||
},
|
||||
"vpermilpd xmm0, xmm1, 10b": {
|
||||
@ -693,8 +693,8 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v2.d[0], v17.d[0]",
|
||||
"mov v2.d[1], v17.d[1]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"mov v16.d[1], v17.d[1]"
|
||||
]
|
||||
},
|
||||
"vpermilpd xmm0, xmm1, 11b": {
|
||||
@ -706,8 +706,8 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v2.d[0], v17.d[1]",
|
||||
"mov v2.d[1], v17.d[1]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"mov v16.d[1], v17.d[1]"
|
||||
]
|
||||
},
|
||||
"vpermilpd ymm0, ymm1, 0000b": {
|
||||
@ -1517,63 +1517,58 @@
|
||||
]
|
||||
},
|
||||
"vroundps xmm0, xmm1, 00000000b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"nearest rounding",
|
||||
"Map 3 0b01 0x08 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"frintn v2.4s, v17.4s",
|
||||
"mov v16.16b, v2.16b"
|
||||
"frintn v16.4s, v17.4s"
|
||||
]
|
||||
},
|
||||
"vroundps xmm0, xmm1, 00000001b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"-inf rounding",
|
||||
"Map 3 0b01 0x08 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"frintm v2.4s, v17.4s",
|
||||
"mov v16.16b, v2.16b"
|
||||
"frintm v16.4s, v17.4s"
|
||||
]
|
||||
},
|
||||
"vroundps xmm0, xmm1, 00000010b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"+inf rounding",
|
||||
"Map 3 0b01 0x08 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"frintp v2.4s, v17.4s",
|
||||
"mov v16.16b, v2.16b"
|
||||
"frintp v16.4s, v17.4s"
|
||||
]
|
||||
},
|
||||
"vroundps xmm0, xmm1, 00000011b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"truncate rounding",
|
||||
"Map 3 0b01 0x08 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"frintz v2.4s, v17.4s",
|
||||
"mov v16.16b, v2.16b"
|
||||
"frintz v16.4s, v17.4s"
|
||||
]
|
||||
},
|
||||
"vroundps xmm0, xmm1, 00000100b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"host mode rounding",
|
||||
"Map 3 0b01 0x08 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"frinti v2.4s, v17.4s",
|
||||
"mov v16.16b, v2.16b"
|
||||
"frinti v16.4s, v17.4s"
|
||||
]
|
||||
},
|
||||
"vroundps ymm0, ymm1, 00000000b": {
|
||||
@ -1632,63 +1627,58 @@
|
||||
]
|
||||
},
|
||||
"vroundpd xmm0, xmm1, 00000000b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"nearest rounding",
|
||||
"Map 3 0b01 0x09 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"frintn v2.2d, v17.2d",
|
||||
"mov v16.16b, v2.16b"
|
||||
"frintn v16.2d, v17.2d"
|
||||
]
|
||||
},
|
||||
"vroundpd xmm0, xmm1, 00000001b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"-inf rounding",
|
||||
"Map 3 0b01 0x09 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"frintm v2.2d, v17.2d",
|
||||
"mov v16.16b, v2.16b"
|
||||
"frintm v16.2d, v17.2d"
|
||||
]
|
||||
},
|
||||
"vroundpd xmm0, xmm1, 00000010b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"+inf rounding",
|
||||
"Map 3 0b01 0x09 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"frintp v2.2d, v17.2d",
|
||||
"mov v16.16b, v2.16b"
|
||||
"frintp v16.2d, v17.2d"
|
||||
]
|
||||
},
|
||||
"vroundpd xmm0, xmm1, 00000011b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"truncate rounding",
|
||||
"Map 3 0b01 0x09 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"frintz v2.2d, v17.2d",
|
||||
"mov v16.16b, v2.16b"
|
||||
"frintz v16.2d, v17.2d"
|
||||
]
|
||||
},
|
||||
"vroundpd xmm0, xmm1, 00000100b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"host mode rounding",
|
||||
"Map 3 0b01 0x09 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"frinti v2.2d, v17.2d",
|
||||
"mov v16.16b, v2.16b"
|
||||
"frinti v16.2d, v17.2d"
|
||||
]
|
||||
},
|
||||
"vroundpd ymm0, ymm1, 00000000b": {
|
||||
@ -1748,7 +1738,7 @@
|
||||
},
|
||||
"vroundss xmm0, xmm1, 00000000b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"nearest rounding",
|
||||
"Map 3 0b01 0x0a 128-bit"
|
||||
@ -1761,7 +1751,7 @@
|
||||
},
|
||||
"vroundss xmm0, xmm1, 00000001b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"-inf rounding",
|
||||
"Map 3 0b01 0x0a 128-bit"
|
||||
@ -1774,7 +1764,7 @@
|
||||
},
|
||||
"vroundss xmm0, xmm1, 00000010b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"+inf rounding",
|
||||
"Map 3 0b01 0x0a 128-bit"
|
||||
@ -1787,7 +1777,7 @@
|
||||
},
|
||||
"vroundss xmm0, xmm1, 00000011b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"truncate rounding",
|
||||
"Map 3 0b01 0x0a 128-bit"
|
||||
@ -1800,7 +1790,7 @@
|
||||
},
|
||||
"vroundss xmm0, xmm1, 00000100b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"host mode rounding",
|
||||
"Map 3 0b01 0x0a 128-bit"
|
||||
@ -1813,7 +1803,7 @@
|
||||
},
|
||||
"vroundsd xmm0, xmm1, 00000000b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"nearest rounding",
|
||||
"Map 3 0b01 0x0b 128-bit"
|
||||
@ -1826,7 +1816,7 @@
|
||||
},
|
||||
"vroundsd xmm0, xmm1, 00000001b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"-inf rounding",
|
||||
"Map 3 0b01 0x0b 128-bit"
|
||||
@ -1839,7 +1829,7 @@
|
||||
},
|
||||
"vroundsd xmm0, xmm1, 00000010b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"+inf rounding",
|
||||
"Map 3 0b01 0x0b 128-bit"
|
||||
@ -1852,7 +1842,7 @@
|
||||
},
|
||||
"vroundsd xmm0, xmm1, 00000011b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"truncate rounding",
|
||||
"Map 3 0b01 0x0b 128-bit"
|
||||
@ -1865,7 +1855,7 @@
|
||||
},
|
||||
"vroundsd xmm0, xmm1, 00000100b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"host mode rounding",
|
||||
"Map 3 0b01 0x0b 128-bit"
|
||||
@ -1993,8 +1983,8 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v2.d[0], v18.d[0]",
|
||||
"mov v2.d[1], v17.d[1]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"mov v16.d[1], v17.d[1]"
|
||||
]
|
||||
},
|
||||
"vblendpd xmm0, xmm1, xmm2, 10b": {
|
||||
@ -2006,8 +1996,8 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v2.d[0], v17.d[0]",
|
||||
"mov v2.d[1], v18.d[1]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"mov v16.d[1], v18.d[1]"
|
||||
]
|
||||
},
|
||||
"vblendpd xmm0, xmm1, xmm2, 11b": {
|
||||
@ -2553,48 +2543,44 @@
|
||||
]
|
||||
},
|
||||
"vpalignr xmm0, xmm1, xmm2, 0": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x0f 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ext v2.16b, v18.16b, v17.16b, #0",
|
||||
"mov v16.16b, v2.16b"
|
||||
"ext v16.16b, v18.16b, v17.16b, #0"
|
||||
]
|
||||
},
|
||||
"vpalignr xmm0, xmm1, xmm2, 1": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x0f 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ext v2.16b, v18.16b, v17.16b, #1",
|
||||
"mov v16.16b, v2.16b"
|
||||
"ext v16.16b, v18.16b, v17.16b, #1"
|
||||
]
|
||||
},
|
||||
"vpalignr xmm0, xmm1, xmm2, 15": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x0f 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ext v2.16b, v18.16b, v17.16b, #15",
|
||||
"mov v16.16b, v2.16b"
|
||||
"ext v16.16b, v18.16b, v17.16b, #15"
|
||||
]
|
||||
},
|
||||
"vpalignr xmm0, xmm1, xmm2, 16": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x0f 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v0.2d, #0x0",
|
||||
"ext v2.16b, v17.16b, v0.16b, #0",
|
||||
"mov v16.16b, v2.16b"
|
||||
"ext v16.16b, v17.16b, v0.16b, #0"
|
||||
]
|
||||
},
|
||||
"vpalignr ymm0, ymm1, ymm2, 0": {
|
||||
@ -2959,111 +2945,138 @@
|
||||
"Map 3 0b01 0x1D 256-bit"
|
||||
]
|
||||
},
|
||||
"vpinsrb xmm0, xmm1, eax, 0": {
|
||||
"vpinsrb xmm0, xmm0, eax, 0": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x20 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v2.16b, v17.16b",
|
||||
"mov v2.16b, v16.16b",
|
||||
"mov v2.b[0], w4",
|
||||
"mov v16.16b, v2.16b"
|
||||
]
|
||||
},
|
||||
"vpinsrb xmm0, xmm1, eax, 15": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"vpinsrb xmm0, xmm1, eax, 0": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x20 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v2.16b, v17.16b",
|
||||
"mov v2.b[15], w4",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v17.16b",
|
||||
"mov v16.b[0], w4"
|
||||
]
|
||||
},
|
||||
"vpinsrb xmm0, xmm1, eax, 15": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x20 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v16.16b, v17.16b",
|
||||
"mov v16.b[15], w4"
|
||||
]
|
||||
},
|
||||
"vinsertps xmm0, xmm1, xmm2, ((0b00 << 6) | (0b00 << 4) | (0b0000))": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x21 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v2.16b, v17.16b",
|
||||
"mov v2.s[0], v18.s[0]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v17.16b",
|
||||
"mov v16.s[0], v18.s[0]"
|
||||
]
|
||||
},
|
||||
"vinsertps xmm0, xmm1, xmm2, ((0b00 << 6) | (0b00 << 4) | (0b1111))": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x21 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v16.16b, v2.16b"
|
||||
"movi v16.2d, #0x0"
|
||||
]
|
||||
},
|
||||
"vinsertps xmm0, xmm1, xmm2, ((0b11 << 6) | (0b11 << 4) | (0b0000))": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x21 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v2.16b, v17.16b",
|
||||
"mov v2.s[3], v18.s[3]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v17.16b",
|
||||
"mov v16.s[3], v18.s[3]"
|
||||
]
|
||||
},
|
||||
"vpinsrd xmm0, xmm1, eax, 0": {
|
||||
"vpinsrd xmm0, xmm0, eax, 0": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x22 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v2.16b, v17.16b",
|
||||
"mov v2.16b, v16.16b",
|
||||
"mov v2.s[0], w4",
|
||||
"mov v16.16b, v2.16b"
|
||||
]
|
||||
},
|
||||
"vpinsrd xmm0, xmm1, eax, 3": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"vpinsrd xmm0, xmm1, eax, 0": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x22 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v2.16b, v17.16b",
|
||||
"mov v2.s[3], w4",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v17.16b",
|
||||
"mov v16.s[0], w4"
|
||||
]
|
||||
},
|
||||
"vpinsrq xmm0, xmm1, rax, 0": {
|
||||
"vpinsrd xmm0, xmm1, eax, 3": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x22 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v16.16b, v17.16b",
|
||||
"mov v16.s[3], w4"
|
||||
]
|
||||
},
|
||||
"vpinsrq xmm0, xmm0, rax, 0": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x22 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v2.16b, v17.16b",
|
||||
"mov v2.16b, v16.16b",
|
||||
"mov v2.d[0], x4",
|
||||
"mov v16.16b, v2.16b"
|
||||
]
|
||||
},
|
||||
"vpinsrq xmm0, xmm1, rax, 1": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"vpinsrq xmm0, xmm1, rax, 0": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x22 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v2.16b, v17.16b",
|
||||
"mov v2.d[1], x4",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v17.16b",
|
||||
"mov v16.d[0], x4"
|
||||
]
|
||||
},
|
||||
"vpinsrq xmm0, xmm1, rax, 1": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x22 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v16.16b, v17.16b",
|
||||
"mov v16.d[1], x4"
|
||||
]
|
||||
},
|
||||
"vinserti128 ymm0, ymm1, xmm2, 0": {
|
||||
@ -3113,14 +3126,13 @@
|
||||
]
|
||||
},
|
||||
"vdpps xmm0, xmm1, xmm2, 00000000b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x40 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v16.16b, v2.16b"
|
||||
"movi v16.2d, #0x0"
|
||||
]
|
||||
},
|
||||
"vdpps xmm0, xmm1, xmm2, 00001111b": {
|
||||
@ -3141,19 +3153,18 @@
|
||||
"mov v2.s[0], v3.s[0]",
|
||||
"mov v2.s[1], v3.s[0]",
|
||||
"mov v2.s[2], v3.s[0]",
|
||||
"mov v2.s[3], v3.s[0]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"mov v16.s[3], v3.s[0]"
|
||||
]
|
||||
},
|
||||
"vdpps xmm0, xmm1, xmm2, 11110000b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x40 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v16.16b, v2.16b"
|
||||
"movi v16.2d, #0x0"
|
||||
]
|
||||
},
|
||||
"vdpps xmm0, xmm1, xmm2, 11111111b": {
|
||||
@ -3170,8 +3181,8 @@
|
||||
"mov v2.s[0], v3.s[0]",
|
||||
"mov v2.s[1], v3.s[0]",
|
||||
"mov v2.s[2], v3.s[0]",
|
||||
"mov v2.s[3], v3.s[0]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"mov v16.s[3], v3.s[0]"
|
||||
]
|
||||
},
|
||||
"vdpps ymm0, ymm1, ymm2, 00000000b": {
|
||||
@ -3335,14 +3346,13 @@
|
||||
]
|
||||
},
|
||||
"vdppd xmm0, xmm1, xmm2, 00000000b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x41 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v16.16b, v2.16b"
|
||||
"movi v16.2d, #0x0"
|
||||
]
|
||||
},
|
||||
"vdppd xmm0, xmm1, xmm2, 00001111b": {
|
||||
@ -3358,19 +3368,18 @@
|
||||
"mov v3.d[1], v2.d[0]",
|
||||
"faddp v3.2d, v3.2d, v2.2d",
|
||||
"mov v2.d[0], v3.d[0]",
|
||||
"mov v2.d[1], v3.d[0]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"mov v16.d[1], v3.d[0]"
|
||||
]
|
||||
},
|
||||
"vdppd xmm0, xmm1, xmm2, 11110000b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x41 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v16.16b, v2.16b"
|
||||
"movi v16.2d, #0x0"
|
||||
]
|
||||
},
|
||||
"vdppd xmm0, xmm1, xmm2, 11111111b": {
|
||||
@ -3384,12 +3393,12 @@
|
||||
"fmul v3.2d, v17.2d, v18.2d",
|
||||
"faddp v3.2d, v3.2d, v2.2d",
|
||||
"mov v2.d[0], v3.d[0]",
|
||||
"mov v2.d[1], v3.d[0]",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"mov v16.d[1], v3.d[0]"
|
||||
]
|
||||
},
|
||||
"vmpsadbw xmm0, xmm1, xmm2, 000b": {
|
||||
"ExpectedInstructionCount": 15,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x42 128-bit"
|
||||
@ -3408,12 +3417,11 @@
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"trn1 v4.4s, v3.4s, v2.4s",
|
||||
"trn2 v2.4s, v3.4s, v2.4s",
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"mov v16.16b, v2.16b"
|
||||
"addp v16.8h, v4.8h, v2.8h"
|
||||
]
|
||||
},
|
||||
"vmpsadbw xmm0, xmm1, xmm2, 001b": {
|
||||
"ExpectedInstructionCount": 15,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x42 128-bit"
|
||||
@ -3432,12 +3440,11 @@
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"trn1 v4.4s, v3.4s, v2.4s",
|
||||
"trn2 v2.4s, v3.4s, v2.4s",
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"mov v16.16b, v2.16b"
|
||||
"addp v16.8h, v4.8h, v2.8h"
|
||||
]
|
||||
},
|
||||
"vmpsadbw xmm0, xmm1, xmm2, 010b": {
|
||||
"ExpectedInstructionCount": 15,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x42 128-bit"
|
||||
@ -3456,12 +3463,11 @@
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"trn1 v4.4s, v3.4s, v2.4s",
|
||||
"trn2 v2.4s, v3.4s, v2.4s",
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"mov v16.16b, v2.16b"
|
||||
"addp v16.8h, v4.8h, v2.8h"
|
||||
]
|
||||
},
|
||||
"vmpsadbw xmm0, xmm1, xmm2, 011b": {
|
||||
"ExpectedInstructionCount": 15,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x42 128-bit"
|
||||
@ -3480,12 +3486,11 @@
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"trn1 v4.4s, v3.4s, v2.4s",
|
||||
"trn2 v2.4s, v3.4s, v2.4s",
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"mov v16.16b, v2.16b"
|
||||
"addp v16.8h, v4.8h, v2.8h"
|
||||
]
|
||||
},
|
||||
"vmpsadbw xmm0, xmm1, xmm2, 100b": {
|
||||
"ExpectedInstructionCount": 15,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x42 128-bit"
|
||||
@ -3504,12 +3509,11 @@
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"trn1 v4.4s, v3.4s, v2.4s",
|
||||
"trn2 v2.4s, v3.4s, v2.4s",
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"mov v16.16b, v2.16b"
|
||||
"addp v16.8h, v4.8h, v2.8h"
|
||||
]
|
||||
},
|
||||
"vmpsadbw xmm0, xmm1, xmm2, 101b": {
|
||||
"ExpectedInstructionCount": 15,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x42 128-bit"
|
||||
@ -3528,12 +3532,11 @@
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"trn1 v4.4s, v3.4s, v2.4s",
|
||||
"trn2 v2.4s, v3.4s, v2.4s",
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"mov v16.16b, v2.16b"
|
||||
"addp v16.8h, v4.8h, v2.8h"
|
||||
]
|
||||
},
|
||||
"vmpsadbw xmm0, xmm1, xmm2, 110b": {
|
||||
"ExpectedInstructionCount": 15,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x42 128-bit"
|
||||
@ -3552,12 +3555,11 @@
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"trn1 v4.4s, v3.4s, v2.4s",
|
||||
"trn2 v2.4s, v3.4s, v2.4s",
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"mov v16.16b, v2.16b"
|
||||
"addp v16.8h, v4.8h, v2.8h"
|
||||
]
|
||||
},
|
||||
"vmpsadbw xmm0, xmm1, xmm2, 111b": {
|
||||
"ExpectedInstructionCount": 15,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x42 128-bit"
|
||||
@ -3576,8 +3578,7 @@
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"trn1 v4.4s, v3.4s, v2.4s",
|
||||
"trn2 v2.4s, v3.4s, v2.4s",
|
||||
"addp v2.8h, v4.8h, v2.8h",
|
||||
"mov v16.16b, v2.16b"
|
||||
"addp v16.8h, v4.8h, v2.8h"
|
||||
]
|
||||
},
|
||||
"vmpsadbw ymm0, ymm1, ymm2, 000b": {
|
||||
@ -3925,49 +3926,45 @@
|
||||
]
|
||||
},
|
||||
"vpclmulqdq xmm0, xmm1, xmm2, 00000b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x44 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"unallocated (Unallocated)",
|
||||
"mov v16.16b, v2.16b"
|
||||
"unallocated (Unallocated)"
|
||||
]
|
||||
},
|
||||
"vpclmulqdq xmm0, xmm1, xmm2, 00001b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x44 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"dup v0.2d, v17.d[1]",
|
||||
"unallocated (Unallocated)",
|
||||
"mov v16.16b, v2.16b"
|
||||
"unallocated (Unallocated)"
|
||||
]
|
||||
},
|
||||
"vpclmulqdq xmm0, xmm1, xmm2, 10000b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x44 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"dup v0.2d, v18.d[1]",
|
||||
"unallocated (Unallocated)",
|
||||
"mov v16.16b, v2.16b"
|
||||
]
|
||||
},
|
||||
"vpclmulqdq xmm0, xmm1, xmm2, 10001b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x44 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"unallocated (Unallocated)",
|
||||
"mov v16.16b, v2.16b"
|
||||
"dup v0.2d, v18.d[1]",
|
||||
"unallocated (Unallocated)"
|
||||
]
|
||||
},
|
||||
"vpclmulqdq xmm0, xmm1, xmm2, 10001b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0x44 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"unallocated (Unallocated)"
|
||||
]
|
||||
},
|
||||
"vpclmulqdq ymm0, ymm1, ymm2, 00000b": {
|
||||
@ -4384,8 +4381,8 @@
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"sshr v2.4s, v19.4s, #31",
|
||||
"bsl v2.16b, v18.16b, v17.16b",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"bsl v16.16b, v18.16b, v17.16b"
|
||||
]
|
||||
},
|
||||
"vblendvps ymm0, ymm1, ymm2, ymm3": {
|
||||
@ -4410,8 +4407,8 @@
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"sshr v2.2d, v19.2d, #63",
|
||||
"bsl v2.16b, v18.16b, v17.16b",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"bsl v16.16b, v18.16b, v17.16b"
|
||||
]
|
||||
},
|
||||
"vblendvpd ymm0, ymm1, ymm2, ymm3": {
|
||||
@ -4436,8 +4433,8 @@
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"sshr v2.16b, v19.16b, #7",
|
||||
"bsl v2.16b, v18.16b, v17.16b",
|
||||
"mov v16.16b, v2.16b"
|
||||
"mov v16.16b, v2.16b",
|
||||
"bsl v16.16b, v18.16b, v17.16b"
|
||||
]
|
||||
},
|
||||
"vpblendvb ymm0, ymm1, ymm2, ymm3": {
|
||||
@ -4695,7 +4692,7 @@
|
||||
]
|
||||
},
|
||||
"vaeskeygenassist xmm0, xmm1, 0": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0xdf 128-bit"
|
||||
@ -4703,14 +4700,13 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #2000]",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v2.16b, v17.16b",
|
||||
"mov v16.16b, v17.16b",
|
||||
"unimplemented (Unimplemented)",
|
||||
"tbl v2.16b, {v2.16b}, v2.16b",
|
||||
"mov v16.16b, v2.16b"
|
||||
"tbl v16.16b, {v16.16b}, v2.16b"
|
||||
]
|
||||
},
|
||||
"vaeskeygenassist xmm0, xmm1, 0xFF": {
|
||||
"ExpectedInstructionCount": 9,
|
||||
"ExpectedInstructionCount": 8,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 3 0b01 0xdf 128-bit"
|
||||
@ -4718,13 +4714,12 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #2000]",
|
||||
"movi v3.2d, #0x0",
|
||||
"mov v2.16b, v17.16b",
|
||||
"mov v16.16b, v17.16b",
|
||||
"unimplemented (Unimplemented)",
|
||||
"tbl v2.16b, {v2.16b}, v2.16b",
|
||||
"tbl v16.16b, {v16.16b}, v2.16b",
|
||||
"mov x0, #0xff00000000",
|
||||
"dup v1.2d, x0",
|
||||
"eor v2.16b, v2.16b, v1.16b",
|
||||
"mov v16.16b, v2.16b"
|
||||
"eor v16.16b, v16.16b, v1.16b"
|
||||
]
|
||||
},
|
||||
"rorx eax, ebx, 0": {
|
||||
|
@ -18,25 +18,23 @@
|
||||
]
|
||||
},
|
||||
"vpsrlw xmm0, xmm1, 15": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 12 0b010 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ushr v2.8h, v17.8h, #15",
|
||||
"mov v16.16b, v2.16b"
|
||||
"ushr v16.8h, v17.8h, #15"
|
||||
]
|
||||
},
|
||||
"vpsrlw xmm0, xmm1, 16": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 12 0b010 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v16.16b, v2.16b"
|
||||
"movi v16.2d, #0x0"
|
||||
]
|
||||
},
|
||||
"vpsrlw ymm0, ymm1, 0": {
|
||||
@ -81,25 +79,23 @@
|
||||
]
|
||||
},
|
||||
"vpsraw xmm0, xmm1, 15": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 12 0b100 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"sshr v2.8h, v17.8h, #15",
|
||||
"mov v16.16b, v2.16b"
|
||||
"sshr v16.8h, v17.8h, #15"
|
||||
]
|
||||
},
|
||||
"vpsraw xmm0, xmm1, 16": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 12 0b100 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"sshr v2.8h, v17.8h, #15",
|
||||
"mov v16.16b, v2.16b"
|
||||
"sshr v16.8h, v17.8h, #15"
|
||||
]
|
||||
},
|
||||
"vpsraw ymm0, ymm1, 0": {
|
||||
@ -145,25 +141,23 @@
|
||||
]
|
||||
},
|
||||
"vpsllw xmm0, xmm1, 15": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 12 0b110 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"shl v2.8h, v17.8h, #15",
|
||||
"mov v16.16b, v2.16b"
|
||||
"shl v16.8h, v17.8h, #15"
|
||||
]
|
||||
},
|
||||
"vpsllw xmm0, xmm1, 16": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 12 0b110 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v16.16b, v2.16b"
|
||||
"movi v16.2d, #0x0"
|
||||
]
|
||||
},
|
||||
"vpsllw ymm0, ymm1, 0": {
|
||||
@ -208,25 +202,23 @@
|
||||
]
|
||||
},
|
||||
"vpsrld xmm0, xmm1, 31": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 13 0b010 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ushr v2.4s, v17.4s, #31",
|
||||
"mov v16.16b, v2.16b"
|
||||
"ushr v16.4s, v17.4s, #31"
|
||||
]
|
||||
},
|
||||
"vpsrld xmm0, xmm1, 32": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 13 0b010 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v16.16b, v2.16b"
|
||||
"movi v16.2d, #0x0"
|
||||
]
|
||||
},
|
||||
"vpsrld ymm0, ymm1, 0": {
|
||||
@ -271,25 +263,23 @@
|
||||
]
|
||||
},
|
||||
"vpsrad xmm0, xmm1, 31": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 13 0b100 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"sshr v2.4s, v17.4s, #31",
|
||||
"mov v16.16b, v2.16b"
|
||||
"sshr v16.4s, v17.4s, #31"
|
||||
]
|
||||
},
|
||||
"vpsrad xmm0, xmm1, 32": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 13 0b100 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"sshr v2.4s, v17.4s, #31",
|
||||
"mov v16.16b, v2.16b"
|
||||
"sshr v16.4s, v17.4s, #31"
|
||||
]
|
||||
},
|
||||
"vpsrad ymm0, ymm1, 0": {
|
||||
@ -335,25 +325,23 @@
|
||||
]
|
||||
},
|
||||
"vpslld xmm0, xmm1, 31": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 13 0b110 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"shl v2.4s, v17.4s, #31",
|
||||
"mov v16.16b, v2.16b"
|
||||
"shl v16.4s, v17.4s, #31"
|
||||
]
|
||||
},
|
||||
"vpslld xmm0, xmm1, 32": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 13 0b110 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v16.16b, v2.16b"
|
||||
"movi v16.2d, #0x0"
|
||||
]
|
||||
},
|
||||
"vpslld ymm0, ymm1, 0": {
|
||||
@ -398,25 +386,23 @@
|
||||
]
|
||||
},
|
||||
"vpsrlq xmm0, xmm1, 63": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 14 0b010 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ushr v2.2d, v17.2d, #63",
|
||||
"mov v16.16b, v2.16b"
|
||||
"ushr v16.2d, v17.2d, #63"
|
||||
]
|
||||
},
|
||||
"vpsrlq xmm0, xmm1, 64": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 14 0b010 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v16.16b, v2.16b"
|
||||
"movi v16.2d, #0x0"
|
||||
]
|
||||
},
|
||||
"vpsrlq ymm0, ymm1, 0": {
|
||||
@ -461,18 +447,6 @@
|
||||
]
|
||||
},
|
||||
"vpsrldq xmm0, xmm1, 15": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map group 14 0b011 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"ext v2.16b, v17.16b, v2.16b, #15",
|
||||
"mov v16.16b, v2.16b"
|
||||
]
|
||||
},
|
||||
"vpsrldq xmm0, xmm1, 16": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
@ -480,7 +454,17 @@
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v16.16b, v2.16b"
|
||||
"ext v16.16b, v17.16b, v2.16b, #15"
|
||||
]
|
||||
},
|
||||
"vpsrldq xmm0, xmm1, 16": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 14 0b011 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v16.2d, #0x0"
|
||||
]
|
||||
},
|
||||
"vpsrldq ymm0, ymm1, 0": {
|
||||
@ -532,25 +516,23 @@
|
||||
]
|
||||
},
|
||||
"vpsllq xmm0, xmm1, 63": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map group 14 0b110 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"shl v2.2d, v17.2d, #63",
|
||||
"mov v16.16b, v2.16b"
|
||||
"shl v16.2d, v17.2d, #63"
|
||||
]
|
||||
},
|
||||
"vpsllq xmm0, xmm1, 64": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map group 14 0b110 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v16.16b, v2.16b"
|
||||
"movi v16.2d, #0x0"
|
||||
]
|
||||
},
|
||||
"vpsllq ymm0, ymm1, 0": {
|
||||
@ -595,18 +577,6 @@
|
||||
]
|
||||
},
|
||||
"vpslldq xmm0, xmm1, 15": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map group 14 0b111 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"ext v2.16b, v2.16b, v17.16b, #1",
|
||||
"mov v16.16b, v2.16b"
|
||||
]
|
||||
},
|
||||
"vpslldq xmm0, xmm1, 16": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
@ -614,7 +584,17 @@
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"mov v16.16b, v2.16b"
|
||||
"ext v16.16b, v2.16b, v17.16b, #1"
|
||||
]
|
||||
},
|
||||
"vpslldq xmm0, xmm1, 16": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Map group 14 0b111 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v16.2d, #0x0"
|
||||
]
|
||||
},
|
||||
"vpslldq ymm0, ymm1, 0": {
|
||||
|
Loading…
Reference in New Issue
Block a user