mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-01-23 15:04:48 +00:00
Merge pull request #3980 from alyssarosenzweig/opt/avx
Optimize AVX load/store with ldp/stp
This commit is contained in:
commit
5ac7d5dfcd
@ -48,6 +48,31 @@ DEF_OP(LoadContext) {
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(LoadContextPair) {
|
||||
const auto Op = IROp->C<IR::IROp_LoadContextPair>();
|
||||
|
||||
if (Op->Class == FEXCore::IR::GPRClass) {
|
||||
const auto Dst1 = GetReg(Op->OutValue1.ID());
|
||||
const auto Dst2 = GetReg(Op->OutValue2.ID());
|
||||
|
||||
switch (IROp->Size) {
|
||||
case 4: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.W(), Dst2.W(), STATE, Op->Offset); break;
|
||||
case 8: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.X(), Dst2.X(), STATE, Op->Offset); break;
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled LoadMemPair size: {}", IROp->Size); break;
|
||||
}
|
||||
} else {
|
||||
const auto Dst1 = GetVReg(Op->OutValue1.ID());
|
||||
const auto Dst2 = GetVReg(Op->OutValue2.ID());
|
||||
|
||||
switch (IROp->Size) {
|
||||
case 4: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.S(), Dst2.S(), STATE, Op->Offset); break;
|
||||
case 8: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.D(), Dst2.D(), STATE, Op->Offset); break;
|
||||
case 16: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.Q(), Dst2.Q(), STATE, Op->Offset); break;
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled LoadMemPair size: {}", IROp->Size); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(StoreContext) {
|
||||
const auto Op = IROp->C<IR::IROp_StoreContext>();
|
||||
const auto OpSize = IROp->Size;
|
||||
@ -80,6 +105,32 @@ DEF_OP(StoreContext) {
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(StoreContextPair) {
|
||||
const auto Op = IROp->C<IR::IROp_StoreContextPair>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
if (Op->Class == FEXCore::IR::GPRClass) {
|
||||
auto Src1 = GetZeroableReg(Op->Value1);
|
||||
auto Src2 = GetZeroableReg(Op->Value2);
|
||||
|
||||
switch (OpSize) {
|
||||
case 4: stp<ARMEmitter::IndexType::OFFSET>(Src1.W(), Src2.W(), STATE, Op->Offset); break;
|
||||
case 8: stp<ARMEmitter::IndexType::OFFSET>(Src1.X(), Src2.X(), STATE, Op->Offset); break;
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled StoreContext size: {}", OpSize); break;
|
||||
}
|
||||
} else {
|
||||
const auto Src1 = GetVReg(Op->Value1.ID());
|
||||
const auto Src2 = GetVReg(Op->Value2.ID());
|
||||
|
||||
switch (OpSize) {
|
||||
case 4: stp<ARMEmitter::IndexType::OFFSET>(Src1.S(), Src2.S(), STATE, Op->Offset); break;
|
||||
case 8: stp<ARMEmitter::IndexType::OFFSET>(Src1.D(), Src2.D(), STATE, Op->Offset); break;
|
||||
case 16: stp<ARMEmitter::IndexType::OFFSET>(Src1.Q(), Src2.Q(), STATE, Op->Offset); break;
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled StoreContextPair size: {}", OpSize); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(LoadRegister) {
|
||||
const auto Op = IROp->C<IR::IROp_LoadRegister>();
|
||||
const auto OpSize = IROp->Size;
|
||||
@ -597,6 +648,32 @@ DEF_OP(LoadMem) {
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(LoadMemPair) {
|
||||
const auto Op = IROp->C<IR::IROp_LoadMemPair>();
|
||||
const auto Addr = GetReg(Op->Addr.ID());
|
||||
|
||||
if (Op->Class == FEXCore::IR::GPRClass) {
|
||||
const auto Dst1 = GetReg(Op->OutValue1.ID());
|
||||
const auto Dst2 = GetReg(Op->OutValue2.ID());
|
||||
|
||||
switch (IROp->Size) {
|
||||
case 4: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.W(), Dst2.W(), Addr, Op->Offset); break;
|
||||
case 8: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.X(), Dst2.X(), Addr, Op->Offset); break;
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled LoadMemPair size: {}", IROp->Size); break;
|
||||
}
|
||||
} else {
|
||||
const auto Dst1 = GetVReg(Op->OutValue1.ID());
|
||||
const auto Dst2 = GetVReg(Op->OutValue2.ID());
|
||||
|
||||
switch (IROp->Size) {
|
||||
case 4: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.S(), Dst2.S(), Addr, Op->Offset); break;
|
||||
case 8: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.D(), Dst2.D(), Addr, Op->Offset); break;
|
||||
case 16: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.Q(), Dst2.Q(), Addr, Op->Offset); break;
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled LoadMemPair size: {}", IROp->Size); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(LoadMemTSO) {
|
||||
const auto Op = IROp->C<IR::IROp_LoadMemTSO>();
|
||||
const auto OpSize = IROp->Size;
|
||||
@ -1443,6 +1520,32 @@ DEF_OP(StoreMem) {
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(StoreMemPair) {
|
||||
const auto Op = IROp->C<IR::IROp_StoreMemPair>();
|
||||
const auto OpSize = IROp->Size;
|
||||
const auto Addr = GetReg(Op->Addr.ID());
|
||||
|
||||
if (Op->Class == FEXCore::IR::GPRClass) {
|
||||
const auto Src1 = GetReg(Op->Value1.ID());
|
||||
const auto Src2 = GetReg(Op->Value2.ID());
|
||||
switch (OpSize) {
|
||||
case 4: stp<ARMEmitter::IndexType::OFFSET>(Src1.W(), Src2.W(), Addr, Op->Offset); break;
|
||||
case 8: stp<ARMEmitter::IndexType::OFFSET>(Src1.X(), Src2.X(), Addr, Op->Offset); break;
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled StoreMem size: {}", OpSize); break;
|
||||
}
|
||||
} else {
|
||||
const auto Src1 = GetVReg(Op->Value1.ID());
|
||||
const auto Src2 = GetVReg(Op->Value2.ID());
|
||||
|
||||
switch (OpSize) {
|
||||
case 4: stp<ARMEmitter::IndexType::OFFSET>(Src1.S(), Src2.S(), Addr, Op->Offset); break;
|
||||
case 8: stp<ARMEmitter::IndexType::OFFSET>(Src1.D(), Src2.D(), Addr, Op->Offset); break;
|
||||
case 16: stp<ARMEmitter::IndexType::OFFSET>(Src1.Q(), Src2.Q(), Addr, Op->Offset); break;
|
||||
default: LOGMAN_MSG_A_FMT("Unhandled StoreMemPair size: {}", OpSize); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(StoreMemTSO) {
|
||||
const auto Op = IROp->C<IR::IROp_StoreMemTSO>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
@ -20,6 +20,7 @@ namespace FEXCore::CPU {
|
||||
|
||||
DEF_OP(AllocateGPR) {}
|
||||
DEF_OP(AllocateGPRAfter) {}
|
||||
DEF_OP(AllocateFPR) {}
|
||||
|
||||
DEF_OP(GuestOpcode) {
|
||||
auto Op = IROp->C<IR::IROp_GuestOpcode>();
|
||||
|
@ -1268,7 +1268,23 @@ public:
|
||||
} else {
|
||||
bool Partial = RegCache.Partial & (1ull << Index);
|
||||
unsigned Size = Partial ? 8 : CacheIndexToSize(Index);
|
||||
_StoreContext(Size, CacheIndexClass(Index), Value, CacheIndexToContextOffset(Index));
|
||||
uint64_t NextBit = (1ull << (Index - 1));
|
||||
uint32_t Offset = CacheIndexToContextOffset(Index);
|
||||
auto Class = CacheIndexClass(Index);
|
||||
|
||||
// Use stp where possible to store multiple values at a time. This accelerates AVX.
|
||||
// TODO: this is all really confusing because of backwards iteration,
|
||||
// can we peel back that hack?
|
||||
if ((Bits & NextBit) && !Partial && Size >= 4 && CacheIndexToContextOffset(Index - 1) == Offset - Size && (Offset - Size) / Size < 64) {
|
||||
LOGMAN_THROW_A_FMT(CacheIndexClass(Index - 1) == Class, "construction");
|
||||
LOGMAN_THROW_A_FMT((Offset % Size) == 0, "construction");
|
||||
Ref ValueNext = RegCache.Value[Index - 1];
|
||||
|
||||
_StoreContextPair(Size, Class, ValueNext, Value, Offset - Size);
|
||||
Bits &= ~NextBit;
|
||||
} else {
|
||||
_StoreContext(Size, Class, Value, Offset);
|
||||
}
|
||||
}
|
||||
|
||||
Bits &= ~(1ull << Index);
|
||||
@ -1901,6 +1917,43 @@ private:
|
||||
return RegCache.Value[Index];
|
||||
}
|
||||
|
||||
RefPair AllocatePair(FEXCore::IR::RegisterClassType Class, uint8_t Size) {
|
||||
if (Class == FPRClass) {
|
||||
return {_AllocateFPR(Size, Size), _AllocateFPR(Size, Size)};
|
||||
} else {
|
||||
return {_AllocateGPR(false), _AllocateGPR(false)};
|
||||
}
|
||||
}
|
||||
|
||||
RefPair LoadContextPair_Uncached(FEXCore::IR::RegisterClassType Class, uint8_t Size, unsigned Offset) {
|
||||
RefPair Values = AllocatePair(Class, Size);
|
||||
_LoadContextPair(Size, Class, Offset, Values.Low, Values.High);
|
||||
return Values;
|
||||
}
|
||||
|
||||
RefPair LoadRegCachePair(uint64_t Offset, uint8_t Index, RegisterClassType RegClass, uint8_t Size) {
|
||||
LOGMAN_THROW_AA_FMT(Index != DFIndex, "must be pairable");
|
||||
|
||||
// Try to load a pair into the cache
|
||||
uint64_t Bits = (3ull << (uint64_t)Index);
|
||||
if (((RegCache.Partial | RegCache.Cached) & Bits) == 0 && ((Offset / Size) < 64)) {
|
||||
auto Values = LoadContextPair_Uncached(RegClass, Size, Offset);
|
||||
RegCache.Value[Index] = Values.Low;
|
||||
RegCache.Value[Index + 1] = Values.High;
|
||||
RegCache.Cached |= Bits;
|
||||
if (Size == 8) {
|
||||
RegCache.Partial |= Bits;
|
||||
}
|
||||
return Values;
|
||||
}
|
||||
|
||||
// Fallback on a pair of loads
|
||||
return {
|
||||
.Low = LoadRegCache(Offset, Index, RegClass, Size),
|
||||
.High = LoadRegCache(Offset + Size, Index + 1, RegClass, Size),
|
||||
};
|
||||
}
|
||||
|
||||
Ref LoadGPR(uint8_t Reg) {
|
||||
return LoadRegCache(Reg, GPR0Index + Reg, GPRClass, CTX->GetGPRSize());
|
||||
}
|
||||
@ -1909,6 +1962,10 @@ private:
|
||||
return LoadRegCache(CacheIndexToContextOffset(Index), Index, CacheIndexClass(Index), Size);
|
||||
}
|
||||
|
||||
RefPair LoadContextPair(uint8_t Size, uint8_t Index) {
|
||||
return LoadRegCachePair(CacheIndexToContextOffset(Index), Index, CacheIndexClass(Index), Size);
|
||||
}
|
||||
|
||||
Ref LoadContext(uint8_t Index) {
|
||||
return LoadContext(CacheIndexToSize(Index), Index);
|
||||
}
|
||||
@ -2342,7 +2399,7 @@ private:
|
||||
IROp_IRHeader* CurrentHeader {};
|
||||
|
||||
Ref _StoreMemAutoTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, Ref Addr, Ref Value, uint8_t Align = 1) {
|
||||
if (CTX->IsAtomicTSOEnabled()) {
|
||||
if (Class == FPRClass ? CTX->IsVectorAtomicTSOEnabled() : CTX->IsAtomicTSOEnabled()) {
|
||||
return _StoreMemTSO(Class, Size, Value, Addr, Invalid(), Align, MEM_OFFSET_SXTX, 1);
|
||||
} else {
|
||||
return _StoreMem(Class, Size, Value, Addr, Invalid(), Align, MEM_OFFSET_SXTX, 1);
|
||||
@ -2350,7 +2407,7 @@ private:
|
||||
}
|
||||
|
||||
Ref _LoadMemAutoTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, Ref ssa0, uint8_t Align = 1) {
|
||||
if (CTX->IsAtomicTSOEnabled()) {
|
||||
if (Class == FPRClass ? CTX->IsVectorAtomicTSOEnabled() : CTX->IsAtomicTSOEnabled()) {
|
||||
return _LoadMemTSO(Class, Size, ssa0, Invalid(), Align, MEM_OFFSET_SXTX, 1);
|
||||
} else {
|
||||
return _LoadMem(Class, Size, ssa0, Invalid(), Align, MEM_OFFSET_SXTX, 1);
|
||||
@ -2368,6 +2425,44 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
AddressMode SelectPairAddressMode(AddressMode A, uint8_t Size) {
|
||||
AddressMode Out {};
|
||||
|
||||
signed OffsetEl = A.Offset / Size;
|
||||
if ((A.Offset % Size) == 0 && OffsetEl >= -64 && OffsetEl < 64) {
|
||||
Out.Offset = A.Offset;
|
||||
A.Offset = 0;
|
||||
}
|
||||
|
||||
Out.Base = LoadEffectiveAddress(A, true, false);
|
||||
return Out;
|
||||
}
|
||||
|
||||
|
||||
RefPair LoadMemPair(FEXCore::IR::RegisterClassType Class, uint8_t Size, Ref Base, unsigned Offset) {
|
||||
RefPair Values = AllocatePair(Class, Size);
|
||||
_LoadMemPair(Class, Size, Base, Offset, Values.Low, Values.High);
|
||||
return Values;
|
||||
}
|
||||
|
||||
RefPair _LoadMemPairAutoTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, AddressMode A, uint8_t Align = 1) {
|
||||
bool AtomicTSO = CTX->IsAtomicTSOEnabled() && !A.NonTSO;
|
||||
|
||||
// Use ldp if possible, otherwise fallback on two loads.
|
||||
if (!AtomicTSO && !A.Segment && Size >= 4 & Size <= 16) {
|
||||
A = SelectPairAddressMode(A, Size);
|
||||
return LoadMemPair(Class, Size, A.Base, A.Offset);
|
||||
} else {
|
||||
AddressMode HighA = A;
|
||||
HighA.Offset += 16;
|
||||
|
||||
return {
|
||||
.Low = _LoadMemAutoTSO(Class, Size, A, Align),
|
||||
.High = _LoadMemAutoTSO(Class, Size, HighA, Align),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Ref _StoreMemAutoTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, AddressMode A, Ref Value, uint8_t Align = 1) {
|
||||
bool AtomicTSO = CTX->IsAtomicTSOEnabled() && !A.NonTSO;
|
||||
A = SelectAddressMode(A, AtomicTSO, Class != GPRClass, Size);
|
||||
@ -2379,6 +2474,20 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
void _StoreMemPairAutoTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, AddressMode A, Ref Value1, Ref Value2, uint8_t Align = 1) {
|
||||
bool AtomicTSO = CTX->IsAtomicTSOEnabled() && !A.NonTSO;
|
||||
|
||||
// Use stp if possible, otherwise fallback on two stores.
|
||||
if (!AtomicTSO && !A.Segment && Size >= 4 & Size <= 16) {
|
||||
A = SelectPairAddressMode(A, Size);
|
||||
_StoreMemPair(Class, Size, Value1, Value2, A.Base, A.Offset);
|
||||
} else {
|
||||
_StoreMemAutoTSO(Class, Size, A, Value1, 1);
|
||||
A.Offset += Size;
|
||||
_StoreMemAutoTSO(Class, Size, A, Value2, 1);
|
||||
}
|
||||
}
|
||||
|
||||
Ref Prefetch(bool ForStore, bool Stream, uint8_t CacheLevel, Ref ssa0) {
|
||||
return _Prefetch(ForStore, Stream, CacheLevel, ssa0, Invalid(), MEM_OFFSET_SXTX, 1);
|
||||
}
|
||||
|
@ -508,10 +508,11 @@ OpDispatchBuilder::RefPair OpDispatchBuilder::AVX128_LoadSource_WithOpSize(
|
||||
LOGMAN_THROW_AA_FMT(!IsVSIB, "VSIB uses LoadVSIB instead");
|
||||
}
|
||||
|
||||
return {
|
||||
.Low = _LoadMemAutoTSO(FPRClass, 16, A, 1),
|
||||
.High = NeedsHigh ? _LoadMemAutoTSO(FPRClass, 16, HighA, 1) : nullptr,
|
||||
};
|
||||
if (NeedsHigh) {
|
||||
return _LoadMemPairAutoTSO(FPRClass, 16, A, 1);
|
||||
} else {
|
||||
return {.Low = _LoadMemAutoTSO(FPRClass, 16, A, 1)};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -557,13 +558,10 @@ void OpDispatchBuilder::AVX128_StoreResult_WithOpSize(FEXCore::X86Tables::Decode
|
||||
} else {
|
||||
AddressMode A = DecodeAddress(Op, Operand, AccessType, false /* IsLoad */);
|
||||
|
||||
_StoreMemAutoTSO(FPRClass, 16, A, Src.Low, 1);
|
||||
|
||||
if (Src.High) {
|
||||
AddressMode HighA = A;
|
||||
HighA.Offset += 16;
|
||||
|
||||
_StoreMemAutoTSO(FPRClass, 16, HighA, Src.High, 1);
|
||||
_StoreMemPairAutoTSO(FPRClass, 16, A, Src.Low, Src.High, 1);
|
||||
} else {
|
||||
_StoreMemAutoTSO(FPRClass, 16, A, Src.Low, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2173,18 +2171,20 @@ void OpDispatchBuilder::AVX128_VectorVariableBlend(OpcodeArgs) {
|
||||
void OpDispatchBuilder::AVX128_SaveAVXState(Ref MemBase) {
|
||||
const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U;
|
||||
|
||||
for (uint32_t i = 0; i < NumRegs; ++i) {
|
||||
Ref Upper = AVX128_LoadXMMRegister(i, true);
|
||||
_StoreMem(FPRClass, 16, Upper, MemBase, _Constant(i * 16 + 576), 16, MEM_OFFSET_SXTX, 1);
|
||||
for (uint32_t i = 0; i < NumRegs; i += 2) {
|
||||
RefPair Pair = LoadContextPair(16, AVXHigh0Index + i);
|
||||
_StoreMemPair(FPRClass, 16, Pair.Low, Pair.High, MemBase, i * 16 + 576);
|
||||
}
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::AVX128_RestoreAVXState(Ref MemBase) {
|
||||
const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U;
|
||||
|
||||
for (uint32_t i = 0; i < NumRegs; ++i) {
|
||||
Ref YMMHReg = _LoadMem(FPRClass, 16, MemBase, _Constant(i * 16 + 576), 16, MEM_OFFSET_SXTX, 1);
|
||||
AVX128_StoreXMMRegister(i, YMMHReg, true);
|
||||
for (uint32_t i = 0; i < NumRegs; i += 2) {
|
||||
auto YMMHRegs = LoadMemPair(FPRClass, 16, MemBase, i * 16 + 576);
|
||||
|
||||
AVX128_StoreXMMRegister(i, YMMHRegs.Low, true);
|
||||
AVX128_StoreXMMRegister(i + 1, YMMHRegs.High, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2736,37 +2736,33 @@ void OpDispatchBuilder::SaveX87State(OpcodeArgs, Ref MemBase) {
|
||||
// MXCSR_MASK: Mask for writes to the MXCSR register
|
||||
// If OSFXSR bit in CR4 is not set than FXSAVE /may/ not save the XMM registers
|
||||
// This is implementation dependent
|
||||
for (uint32_t i = 0; i < Core::CPUState::NUM_MMS; ++i) {
|
||||
Ref MMReg = LoadContext(MM0Index + i);
|
||||
|
||||
_StoreMem(FPRClass, 16, MMReg, MemBase, _Constant(i * 16 + 32), 16, MEM_OFFSET_SXTX, 1);
|
||||
for (uint32_t i = 0; i < Core::CPUState::NUM_MMS; i += 2) {
|
||||
RefPair MMRegs = LoadContextPair(16, MM0Index + i);
|
||||
_StoreMemPair(FPRClass, 16, MMRegs.Low, MMRegs.High, MemBase, i * 16 + 32);
|
||||
}
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::SaveSSEState(Ref MemBase) {
|
||||
const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U;
|
||||
|
||||
for (uint32_t i = 0; i < NumRegs; ++i) {
|
||||
Ref XMMReg = LoadXMMRegister(i);
|
||||
|
||||
_StoreMem(FPRClass, 16, XMMReg, MemBase, _Constant(i * 16 + 160), 16, MEM_OFFSET_SXTX, 1);
|
||||
for (uint32_t i = 0; i < NumRegs; i += 2) {
|
||||
_StoreMemPair(FPRClass, 16, LoadXMMRegister(i), LoadXMMRegister(i + 1), MemBase, i * 16 + 160);
|
||||
}
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::SaveMXCSRState(Ref MemBase) {
|
||||
_StoreMem(GPRClass, 4, GetMXCSR(), MemBase, _Constant(24), 4, MEM_OFFSET_SXTX, 1);
|
||||
|
||||
// Store the mask for all bits.
|
||||
_StoreMem(GPRClass, 4, _Constant(0xFFFF), MemBase, _Constant(28), 4, MEM_OFFSET_SXTX, 1);
|
||||
// Store MXCSR and the mask for all bits.
|
||||
_StoreMemPair(GPRClass, 4, GetMXCSR(), _Constant(0xFFFF), MemBase, 24);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::SaveAVXState(Ref MemBase) {
|
||||
const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U;
|
||||
|
||||
for (uint32_t i = 0; i < NumRegs; ++i) {
|
||||
Ref Upper = _VDupElement(32, 16, LoadXMMRegister(i), 1);
|
||||
for (uint32_t i = 0; i < NumRegs; i += 2) {
|
||||
Ref Upper0 = _VDupElement(32, 16, LoadXMMRegister(i + 0), 1);
|
||||
Ref Upper1 = _VDupElement(32, 16, LoadXMMRegister(i + 1), 1);
|
||||
|
||||
_StoreMem(FPRClass, 16, Upper, MemBase, _Constant(i * 16 + 576), 16, MEM_OFFSET_SXTX, 1);
|
||||
_StoreMemPair(FPRClass, 16, Upper0, Upper1, MemBase, i * 16 + 576);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2868,18 +2864,22 @@ void OpDispatchBuilder::RestoreX87State(Ref MemBase) {
|
||||
StoreContext(AbridgedFTWIndex, _LoadMem(GPRClass, 1, MemBase, _Constant(4), 2, MEM_OFFSET_SXTX, 1));
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < Core::CPUState::NUM_MMS; ++i) {
|
||||
auto MMReg = _LoadMem(FPRClass, 16, MemBase, _Constant(i * 16 + 32), 16, MEM_OFFSET_SXTX, 1);
|
||||
StoreContext(MM0Index + i, MMReg);
|
||||
for (uint32_t i = 0; i < Core::CPUState::NUM_MMS; i += 2) {
|
||||
auto MMRegs = LoadMemPair(FPRClass, 16, MemBase, i * 16 + 32);
|
||||
|
||||
StoreContext(MM0Index + i, MMRegs.Low);
|
||||
StoreContext(MM0Index + i + 1, MMRegs.High);
|
||||
}
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::RestoreSSEState(Ref MemBase) {
|
||||
const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U;
|
||||
|
||||
for (uint32_t i = 0; i < NumRegs; ++i) {
|
||||
Ref XMMReg = _LoadMem(FPRClass, 16, MemBase, _Constant(i * 16 + 160), 16, MEM_OFFSET_SXTX, 1);
|
||||
StoreXMMRegister(i, XMMReg);
|
||||
for (uint32_t i = 0; i < NumRegs; i += 2) {
|
||||
auto XMMRegs = LoadMemPair(FPRClass, 16, MemBase, i * 16 + 160);
|
||||
|
||||
StoreXMMRegister(i, XMMRegs.Low);
|
||||
StoreXMMRegister(i + 1, XMMRegs.High);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2896,11 +2896,12 @@ void OpDispatchBuilder::RestoreMXCSRState(Ref MXCSR) {
|
||||
void OpDispatchBuilder::RestoreAVXState(Ref MemBase) {
|
||||
const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U;
|
||||
|
||||
for (uint32_t i = 0; i < NumRegs; ++i) {
|
||||
Ref XMMReg = LoadXMMRegister(i);
|
||||
Ref YMMHReg = _LoadMem(FPRClass, 16, MemBase, _Constant(i * 16 + 576), 16, MEM_OFFSET_SXTX, 1);
|
||||
Ref YMM = _VInsElement(32, 16, 1, 0, XMMReg, YMMHReg);
|
||||
StoreXMMRegister(i, YMM);
|
||||
for (uint32_t i = 0; i < NumRegs; i += 2) {
|
||||
Ref XMMReg0 = LoadXMMRegister(i + 0);
|
||||
Ref XMMReg1 = LoadXMMRegister(i + 1);
|
||||
auto YMMHRegs = LoadMemPair(FPRClass, 16, MemBase, i * 16 + 576);
|
||||
StoreXMMRegister(i + 0, _VInsElement(32, 16, 1, 0, XMMReg0, YMMHRegs.Low));
|
||||
StoreXMMRegister(i + 1, _VInsElement(32, 16, 1, 0, XMMReg1, YMMHRegs.High));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -253,6 +253,11 @@
|
||||
"If ForPair is set, RA will try to allocate the base of a register pair"],
|
||||
"DestSize": "8"
|
||||
},
|
||||
"FPR = AllocateFPR u8:#RegisterSize, u8:#ElementSize": {
|
||||
"Desc": ["Like AllocateGPR, but for FPR"],
|
||||
"DestSize": "RegisterSize",
|
||||
"NumElements": "RegisterSize / ElementSize"
|
||||
},
|
||||
"GPR = AllocateGPRAfter GPR:$After": {
|
||||
"Desc": ["Silly pseudo-instruction to allocate a register for a future destination",
|
||||
"This is a kludge to deal with the IR's lack of multiple destinations",
|
||||
@ -386,6 +391,20 @@
|
||||
]
|
||||
},
|
||||
|
||||
"SSA:$Value1, SSA:$Value2 = LoadContextPair u8:#ByteSize, RegisterClass:$Class, u32:$Offset": {
|
||||
"Desc": ["Loads a pair of values from the context with offset",
|
||||
"Value0 = Ctx[Offset], Value1 = Ctx[Offset + ByteSize]"
|
||||
],
|
||||
"HasSideEffects": true,
|
||||
"DestSize": "ByteSize",
|
||||
"EmitValidation": [
|
||||
"($Class == GPRClass && (#ByteSize == 1 || #ByteSize == 2 || #ByteSize == 4 || #ByteSize == 8)) || $Class == FPRClass",
|
||||
"($Class == FPRClass && (#ByteSize == 1 || #ByteSize == 2 || #ByteSize == 4 || #ByteSize == 8 || #ByteSize == 16 || #ByteSize == 32)) || $Class == GPRClass",
|
||||
"!($Offset >= offsetof(Core::CPUState, gregs[0]) && $Offset < offsetof(Core::CPUState, gregs[16])) && \"Can't LoadContext to GPR\"",
|
||||
"!($Offset >= offsetof(Core::CPUState, xmm.avx.data[0]) && $Offset < offsetof(Core::CPUState, xmm.avx.data[16])) && \"Can't LoadContext to XMM\""
|
||||
]
|
||||
},
|
||||
|
||||
"StoreContext u8:#ByteSize, RegisterClass:$Class, SSA:$Value, u32:$Offset": {
|
||||
"Desc": ["Stores a value to the context with offset",
|
||||
"Ctx[Offset] = Value",
|
||||
@ -403,6 +422,24 @@
|
||||
]
|
||||
},
|
||||
|
||||
"StoreContextPair u8:#ByteSize, RegisterClass:$Class, SSA:$Value1, SSA:$Value2, u32:$Offset": {
|
||||
"Desc": ["Stores a pair of values to the context with offset",
|
||||
"Ctx[Offset] = Value1, Ctx[Offset + ByteSize] = Value2",
|
||||
"Zero Extends if value's type is too small",
|
||||
"Truncates if value's type is too large"
|
||||
],
|
||||
"HasSideEffects": true,
|
||||
"DestSize": "ByteSize",
|
||||
"EmitValidation": [
|
||||
"WalkFindRegClass($Value1) == $Class",
|
||||
"WalkFindRegClass($Value2) == $Class",
|
||||
"($Class == GPRClass && (#ByteSize == 1 || #ByteSize == 2 || #ByteSize == 4 || #ByteSize == 8)) || $Class == FPRClass",
|
||||
"($Class == FPRClass && (#ByteSize == 1 || #ByteSize == 2 || #ByteSize == 4 || #ByteSize == 8 || #ByteSize == 16 || #ByteSize == 32)) || $Class == GPRClass",
|
||||
"!($Offset >= offsetof(Core::CPUState, gregs[0]) && $Offset < offsetof(Core::CPUState, gregs[16])) && \"Can't StoreContext to GPR\"",
|
||||
"!($Offset >= offsetof(Core::CPUState, xmm.avx.data[0]) && $Offset < offsetof(Core::CPUState, xmm.avx.data[16])) && \"Can't StoreContext to XMM\""
|
||||
]
|
||||
},
|
||||
|
||||
"SSA = LoadContextIndexed GPR:$Index, u8:#ByteSize, u32:$BaseOffset, u32:$Stride, RegisterClass:$Class": {
|
||||
"Desc": ["Loads a value from the context with offset and indexed by SSA value",
|
||||
"Dest = Ctx[BaseOffset + Index * Stride]"
|
||||
@ -476,6 +513,12 @@
|
||||
"DestSize": "Size"
|
||||
},
|
||||
|
||||
"SSA:$Value1, SSA:$Value2 = LoadMemPair RegisterClass:$Class, u8:#Size, GPR:$Addr, u32:$Offset": {
|
||||
"Desc": ["Load a pair of values from memory."],
|
||||
"DestSize": "Size",
|
||||
"HasSideEffects": true
|
||||
},
|
||||
|
||||
"StoreMem RegisterClass:$Class, u8:#Size, SSA:$Value, GPR:$Addr, GPR:$Offset, u8:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": {
|
||||
"Desc": [ "Stores a value to memory.",
|
||||
"Zero Extends if value's type is too small",
|
||||
@ -488,6 +531,19 @@
|
||||
]
|
||||
},
|
||||
|
||||
"StoreMemPair RegisterClass:$Class, u8:#Size, SSA:$Value1, SSA:$Value2, GPR:$Addr, u32:$Offset": {
|
||||
"Desc": [ "Stores a pair of values to memory.",
|
||||
"Zero Extends if value's type is too small",
|
||||
"Truncates if value's type is too large"
|
||||
],
|
||||
"HasSideEffects": true,
|
||||
"DestSize": "Size",
|
||||
"EmitValidation": [
|
||||
"WalkFindRegClass($Value1) == $Class",
|
||||
"WalkFindRegClass($Value2) == $Class"
|
||||
]
|
||||
},
|
||||
|
||||
"SSA = LoadMemTSO RegisterClass:$Class, u8:#Size, GPR:$Addr, GPR:$Offset, u8:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": {
|
||||
"Desc": ["Does a x86 TSO compatible load from memory. Offset must be Invalid()."
|
||||
],
|
||||
|
@ -46,13 +46,12 @@
|
||||
]
|
||||
},
|
||||
"vmovups ymm0, [rax]": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Comment": [
|
||||
"Map 1 0b00 0x10 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q16, [x4]",
|
||||
"ldr q2, [x4, #16]",
|
||||
"ldp q16, q2, [x4]",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
@ -89,13 +88,12 @@
|
||||
]
|
||||
},
|
||||
"vmovupd ymm0, [rax]": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Comment": [
|
||||
"Map 1 0b01 0x10 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q16, [x4]",
|
||||
"ldr q2, [x4, #16]",
|
||||
"ldp q16, q2, [x4]",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
@ -156,14 +154,13 @@
|
||||
]
|
||||
},
|
||||
"vmovups [rax], ymm0": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Comment": [
|
||||
"Map 1 0b00 0x11 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #16]",
|
||||
"str q16, [x4]",
|
||||
"str q2, [x4, #16]"
|
||||
"stp q16, q2, [x4]"
|
||||
]
|
||||
},
|
||||
"vmovupd [rax], xmm0": {
|
||||
@ -176,14 +173,13 @@
|
||||
]
|
||||
},
|
||||
"vmovupd [rax], ymm0": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Comment": [
|
||||
"Map 1 0b01 0x11 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #16]",
|
||||
"str q16, [x4]",
|
||||
"str q2, [x4, #16]"
|
||||
"stp q16, q2, [x4]"
|
||||
]
|
||||
},
|
||||
"vmovss [rax], xmm0": {
|
||||
@ -272,13 +268,12 @@
|
||||
]
|
||||
},
|
||||
"vmovsldup ymm0, [rax]": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 1 0b10 0x12 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x4]",
|
||||
"ldr q3, [x4, #16]",
|
||||
"ldp q2, q3, [x4]",
|
||||
"trn1 v16.4s, v2.4s, v2.4s",
|
||||
"trn1 v2.4s, v3.4s, v3.4s",
|
||||
"str q2, [x28, #16]"
|
||||
@ -297,13 +292,12 @@
|
||||
]
|
||||
},
|
||||
"vmovddup ymm0, [rax]": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 1 0b11 0x12 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x4]",
|
||||
"ldr q3, [x4, #16]",
|
||||
"ldp q2, q3, [x4]",
|
||||
"dup v16.2d, v2.d[0]",
|
||||
"dup v2.2d, v3.d[0]",
|
||||
"str q2, [x28, #16]"
|
||||
@ -340,14 +334,13 @@
|
||||
]
|
||||
},
|
||||
"vunpcklps ymm0, ymm1, [rax]": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Comment": [
|
||||
"Map 1 0b00 0x14 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #32]",
|
||||
"ldr q3, [x4]",
|
||||
"ldr q4, [x4, #16]",
|
||||
"ldp q3, q4, [x4]",
|
||||
"zip1 v16.4s, v17.4s, v3.4s",
|
||||
"zip1 v2.4s, v2.4s, v4.4s",
|
||||
"str q2, [x28, #16]"
|
||||
@ -366,14 +359,13 @@
|
||||
]
|
||||
},
|
||||
"vunpcklpd ymm0, ymm1, [rax]": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Comment": [
|
||||
"Map 1 0b01 0x14 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #32]",
|
||||
"ldr q3, [x4]",
|
||||
"ldr q4, [x4, #16]",
|
||||
"ldp q3, q4, [x4]",
|
||||
"zip1 v16.2d, v17.2d, v3.2d",
|
||||
"zip1 v2.2d, v2.2d, v4.2d",
|
||||
"str q2, [x28, #16]"
|
||||
@ -392,14 +384,13 @@
|
||||
]
|
||||
},
|
||||
"vunpckhps ymm0, ymm1, [rax]": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Comment": [
|
||||
"Map 1 0b00 0x15 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #32]",
|
||||
"ldr q3, [x4]",
|
||||
"ldr q4, [x4, #16]",
|
||||
"ldp q3, q4, [x4]",
|
||||
"zip2 v16.4s, v17.4s, v3.4s",
|
||||
"zip2 v2.4s, v2.4s, v4.4s",
|
||||
"str q2, [x28, #16]"
|
||||
@ -418,14 +409,13 @@
|
||||
]
|
||||
},
|
||||
"vunpckhpd ymm0, ymm1, [rax]": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Comment": [
|
||||
"Map 1 0b01 0x15 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #32]",
|
||||
"ldr q3, [x4]",
|
||||
"ldr q4, [x4, #16]",
|
||||
"ldp q3, q4, [x4]",
|
||||
"zip2 v16.2d, v17.2d, v3.2d",
|
||||
"zip2 v2.2d, v2.2d, v4.2d",
|
||||
"str q2, [x28, #16]"
|
||||
@ -479,13 +469,12 @@
|
||||
]
|
||||
},
|
||||
"vmovshdup ymm0, [rax]": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Comment": [
|
||||
"Map 1 0b10 0x16 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x4]",
|
||||
"ldr q3, [x4, #16]",
|
||||
"ldp q2, q3, [x4]",
|
||||
"trn2 v16.4s, v2.4s, v2.4s",
|
||||
"trn2 v2.4s, v3.4s, v3.4s",
|
||||
"str q2, [x28, #16]"
|
||||
@ -1634,33 +1623,25 @@
|
||||
]
|
||||
},
|
||||
"vzeroupper": {
|
||||
"ExpectedInstructionCount": 17,
|
||||
"ExpectedInstructionCount": 9,
|
||||
"Comment": [
|
||||
"Might be able to use DZ ZVA",
|
||||
"Map 1 0b01 0x77 L=0"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #256]",
|
||||
"str q2, [x28, #240]",
|
||||
"str q2, [x28, #224]",
|
||||
"str q2, [x28, #208]",
|
||||
"str q2, [x28, #192]",
|
||||
"str q2, [x28, #176]",
|
||||
"str q2, [x28, #160]",
|
||||
"str q2, [x28, #144]",
|
||||
"str q2, [x28, #128]",
|
||||
"str q2, [x28, #112]",
|
||||
"str q2, [x28, #96]",
|
||||
"str q2, [x28, #80]",
|
||||
"str q2, [x28, #64]",
|
||||
"str q2, [x28, #48]",
|
||||
"str q2, [x28, #32]",
|
||||
"str q2, [x28, #16]"
|
||||
"stp q2, q2, [x28, #240]",
|
||||
"stp q2, q2, [x28, #208]",
|
||||
"stp q2, q2, [x28, #176]",
|
||||
"stp q2, q2, [x28, #144]",
|
||||
"stp q2, q2, [x28, #112]",
|
||||
"stp q2, q2, [x28, #80]",
|
||||
"stp q2, q2, [x28, #48]",
|
||||
"stp q2, q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vzeroall": {
|
||||
"ExpectedInstructionCount": 32,
|
||||
"ExpectedInstructionCount": 24,
|
||||
"Comment": [
|
||||
"Might be able to use DZ ZVA",
|
||||
"Map 1 0b01 0x77 L=1"
|
||||
@ -1682,22 +1663,14 @@
|
||||
"movi v29.2d, #0x0",
|
||||
"movi v30.2d, #0x0",
|
||||
"movi v31.2d, #0x0",
|
||||
"str q31, [x28, #256]",
|
||||
"str q31, [x28, #240]",
|
||||
"str q31, [x28, #224]",
|
||||
"str q31, [x28, #208]",
|
||||
"str q31, [x28, #192]",
|
||||
"str q31, [x28, #176]",
|
||||
"str q31, [x28, #160]",
|
||||
"str q31, [x28, #144]",
|
||||
"str q31, [x28, #128]",
|
||||
"str q31, [x28, #112]",
|
||||
"str q31, [x28, #96]",
|
||||
"str q31, [x28, #80]",
|
||||
"str q31, [x28, #64]",
|
||||
"str q31, [x28, #48]",
|
||||
"str q31, [x28, #32]",
|
||||
"str q31, [x28, #16]"
|
||||
"stp q31, q31, [x28, #240]",
|
||||
"stp q31, q31, [x28, #208]",
|
||||
"stp q31, q31, [x28, #176]",
|
||||
"stp q31, q31, [x28, #144]",
|
||||
"stp q31, q31, [x28, #112]",
|
||||
"stp q31, q31, [x28, #80]",
|
||||
"stp q31, q31, [x28, #48]",
|
||||
"stp q31, q31, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vcmpps xmm0, xmm1, xmm2, 0x00": {
|
||||
@ -2631,13 +2604,12 @@
|
||||
]
|
||||
},
|
||||
"vmovaps ymm0, [rax]": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Comment": [
|
||||
"Map 1 0b00 0x28 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q16, [x4]",
|
||||
"ldr q2, [x4, #16]",
|
||||
"ldp q16, q2, [x4]",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
@ -2675,13 +2647,12 @@
|
||||
]
|
||||
},
|
||||
"vmovapd ymm0, [rax]": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Comment": [
|
||||
"Map 1 0b01 0x28 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q16, [x4]",
|
||||
"ldr q2, [x4, #16]",
|
||||
"ldp q16, q2, [x4]",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
@ -2717,14 +2688,13 @@
|
||||
]
|
||||
},
|
||||
"vmovaps [rax], ymm0": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Comment": [
|
||||
"Map 1 0b00 0x29 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #16]",
|
||||
"str q16, [x4]",
|
||||
"str q2, [x4, #16]"
|
||||
"stp q16, q2, [x4]"
|
||||
]
|
||||
},
|
||||
"vmovapd [rax], xmm0": {
|
||||
@ -2737,14 +2707,13 @@
|
||||
]
|
||||
},
|
||||
"vmovapd [rax], ymm0": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Comment": [
|
||||
"Map 1 0b01 0x29 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #16]",
|
||||
"str q16, [x4]",
|
||||
"str q2, [x4, #16]"
|
||||
"stp q16, q2, [x4]"
|
||||
]
|
||||
},
|
||||
"vcvtsi2ss xmm0, xmm1, eax": {
|
||||
@ -3161,13 +3130,12 @@
|
||||
]
|
||||
},
|
||||
"vcvtpd2ps xmm0, yword [rax]": {
|
||||
"ExpectedInstructionCount": 8,
|
||||
"ExpectedInstructionCount": 7,
|
||||
"Comment": [
|
||||
"Map 1 0b01 0x5a 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x4]",
|
||||
"ldr q3, [x4, #16]",
|
||||
"ldp q2, q3, [x4]",
|
||||
"fcvtn v2.2s, v2.2d",
|
||||
"fcvtn v3.2s, v3.2d",
|
||||
"mov v16.16b, v2.16b",
|
||||
@ -4005,47 +3973,43 @@
|
||||
]
|
||||
},
|
||||
"vmovdqa ymm0, [rax]": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Comment": [
|
||||
"Map 1 0b01 0x7f 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q16, [x4]",
|
||||
"ldr q2, [x4, #16]",
|
||||
"ldp q16, q2, [x4]",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vmovdqa [rax], ymm0": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Comment": [
|
||||
"Map 1 0b01 0x7f 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #16]",
|
||||
"str q16, [x4]",
|
||||
"str q2, [x4, #16]"
|
||||
"stp q16, q2, [x4]"
|
||||
]
|
||||
},
|
||||
"vmovdqu ymm0, [rax]": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Comment": [
|
||||
"Map 1 0b10 0x7f 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q16, [x4]",
|
||||
"ldr q2, [x4, #16]",
|
||||
"ldp q16, q2, [x4]",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vmovdqu [rax], ymm0": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Comment": [
|
||||
"Map 1 0b10 0x7f 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #16]",
|
||||
"str q16, [x4]",
|
||||
"str q2, [x4, #16]"
|
||||
"stp q16, q2, [x4]"
|
||||
]
|
||||
},
|
||||
"vaddsubpd xmm0, xmm1, xmm2": {
|
||||
@ -5178,13 +5142,12 @@
|
||||
]
|
||||
},
|
||||
"vlddqu ymm0, [rax]": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Comment": [
|
||||
"Map 1 0b11 0xf0 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q16, [x4]",
|
||||
"ldr q2, [x4, #16]",
|
||||
"ldp q16, q2, [x4]",
|
||||
"str q2, [x28, #16]"
|
||||
]
|
||||
},
|
||||
|
@ -1052,6 +1052,86 @@
|
||||
"ldr x20, [x28, #960]",
|
||||
"ldur x7, [x20, #20]"
|
||||
]
|
||||
},
|
||||
"vmovdqu ymm7,yword [rsi+0x60]": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"ExpectedArm64ASM": [
|
||||
"ldp q23, q2, [x10, #96]",
|
||||
"str q2, [x28, #128]"
|
||||
]
|
||||
},
|
||||
"vmovdqu ymm7,yword [rsi+0x120]": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"ExpectedArm64ASM": [
|
||||
"ldp q23, q2, [x10, #288]",
|
||||
"str q2, [x28, #128]"
|
||||
]
|
||||
},
|
||||
"vmovdqu ymm7,yword [rsi-0x60]": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"ExpectedArm64ASM": [
|
||||
"ldp q23, q2, [x10, #-96]",
|
||||
"str q2, [x28, #128]"
|
||||
]
|
||||
},
|
||||
"vmovdqu ymm7,yword [rsi-0x400]": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"ExpectedArm64ASM": [
|
||||
"ldp q23, q2, [x10, #-1024]",
|
||||
"str q2, [x28, #128]"
|
||||
]
|
||||
},
|
||||
"vmovdqu ymm7,yword [rsi-0x420]": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedArm64ASM": [
|
||||
"sub x20, x10, #0x420 (1056)",
|
||||
"ldp q23, q2, [x20]",
|
||||
"str q2, [x28, #128]"
|
||||
]
|
||||
},
|
||||
"vmovdqu ymm7,yword [rsi+0x3d0]": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"ExpectedArm64ASM": [
|
||||
"ldp q23, q2, [x10, #976]",
|
||||
"str q2, [x28, #128]"
|
||||
]
|
||||
},
|
||||
"vmovdqu ymm7,yword [rsi+0x400]": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedArm64ASM": [
|
||||
"add x20, x10, #0x400 (1024)",
|
||||
"ldp q23, q2, [x20]",
|
||||
"str q2, [x28, #128]"
|
||||
]
|
||||
},
|
||||
"vmovdqa yword [rcx+0x60],ymm1": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #32]",
|
||||
"stp q17, q2, [x5, #96]"
|
||||
]
|
||||
},
|
||||
"vmovdqa yword [rcx+0x3d0],ymm1": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #32]",
|
||||
"stp q17, q2, [x5, #976]"
|
||||
]
|
||||
},
|
||||
"vmovdqa yword [rcx-0x3d0],ymm1": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #32]",
|
||||
"stp q17, q2, [x5, #-976]"
|
||||
]
|
||||
},
|
||||
"vmovdqa yword [rcx+rsi-0x3d0],ymm1": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedArm64ASM": [
|
||||
"ldr q2, [x28, #32]",
|
||||
"add x20, x5, x10",
|
||||
"stp q17, q2, [x20, #-976]"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -428,6 +428,161 @@
|
||||
"mov x11, x26"
|
||||
]
|
||||
},
|
||||
"glibc AVX memcpy block 1": {
|
||||
"ExpectedInstructionCount": 26,
|
||||
"x86Insts": [
|
||||
"vmovdqu ymm5,yword [rsi+0x20]",
|
||||
"vmovdqu ymm6,yword [rsi+0x40]",
|
||||
"lea rcx,[rdi+rdx*1-0x81]",
|
||||
"vmovdqu ymm7,yword [rsi+0x60]",
|
||||
"vmovdqu ymm8,yword [rsi+rdx*1-0x20]",
|
||||
"sub rsi,rdi",
|
||||
"and rcx,0xffffffffffffffe0",
|
||||
"add rsi,rcx",
|
||||
"nop dword [rax+0x0]",
|
||||
"vmovdqu ymm1,yword [rsi+0x60]",
|
||||
"vmovdqu ymm2,yword [rsi+0x40]",
|
||||
"vmovdqu ymm3,yword [rsi+0x20]",
|
||||
"vmovdqu ymm4,yword [rsi]",
|
||||
"add rsi,0xffffffffffffff80",
|
||||
"vmovdqa yword [rcx+0x60],ymm1",
|
||||
"vmovdqa yword [rcx+0x40],ymm2",
|
||||
"vmovdqa yword [rcx+0x20],ymm3",
|
||||
"vmovdqa yword [rcx],ymm4",
|
||||
"add rcx,0xffffffffffffff80",
|
||||
"cmp rdi,rcx"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"ldp q21, q2, [x10, #32]",
|
||||
"ldp q22, q3, [x10, #64]",
|
||||
"sub x20, x11, #0x81 (129)",
|
||||
"add x5, x20, x6",
|
||||
"ldp q23, q4, [x10, #96]",
|
||||
"add x20, x10, x6",
|
||||
"ldp q24, q5, [x20, #-32]",
|
||||
"sub x10, x10, x11",
|
||||
"and x5, x5, #0xffffffffffffffe0",
|
||||
"add x10, x10, x5",
|
||||
"ldp q17, q6, [x10, #96]",
|
||||
"ldp q18, q7, [x10, #64]",
|
||||
"ldp q19, q8, [x10, #32]",
|
||||
"ldp q20, q9, [x10]",
|
||||
"sub x10, x10, #0x80 (128)",
|
||||
"stp q17, q6, [x5, #96]",
|
||||
"stp q18, q7, [x5, #64]",
|
||||
"stp q19, q8, [x5, #32]",
|
||||
"stp q20, q9, [x5]",
|
||||
"sub x5, x5, #0x80 (128)",
|
||||
"eor w27, w11, w5",
|
||||
"subs x26, x11, x5",
|
||||
"stp q4, q5, [x28, #128]",
|
||||
"stp q2, q3, [x28, #96]",
|
||||
"stp q8, q9, [x28, #64]",
|
||||
"stp q6, q7, [x28, #32]"
|
||||
]
|
||||
},
|
||||
"glibc AVX memcpy block 2": {
|
||||
"ExpectedInstructionCount": 31,
|
||||
"x86Insts": [
|
||||
"vmovdqu ymm5,yword [rsi+rdx*1-0x20]",
|
||||
"vmovdqu ymm6,yword [rsi+rdx*1-0x40]",
|
||||
"mov rcx,rdi",
|
||||
"or rdi,0x1f",
|
||||
"vmovdqu ymm7,yword [rsi+rdx*1-0x60]",
|
||||
"vmovdqu ymm8,yword [rsi+rdx*1-0x80]",
|
||||
"sub rsi,rcx",
|
||||
"inc rdi",
|
||||
"add rsi,rdi",
|
||||
"lea rdx,[rcx+rdx*1-0x80]",
|
||||
"nop dword [rax+rax*1+0x0]",
|
||||
"vmovdqu ymm1,yword [rsi]",
|
||||
"vmovdqu ymm2,yword [rsi+0x20]",
|
||||
"vmovdqu ymm3,yword [rsi+0x40]",
|
||||
"vmovdqu ymm4,yword [rsi+0x60]",
|
||||
"sub rsi,0xffffffffffffff80",
|
||||
"vmovdqa yword [rdi],ymm1",
|
||||
"vmovdqa yword [rdi+0x20],ymm2",
|
||||
"vmovdqa yword [rdi+0x40],ymm3",
|
||||
"vmovdqa yword [rdi+0x60],ymm4",
|
||||
"sub rdi,0xffffffffffffff80",
|
||||
"cmp rdx,rdi"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"add x20, x10, x6",
|
||||
"ldp q21, q2, [x20, #-32]",
|
||||
"add x20, x10, x6",
|
||||
"ldp q22, q3, [x20, #-64]",
|
||||
"mov x5, x11",
|
||||
"orr x11, x11, #0x1f",
|
||||
"add x20, x10, x6",
|
||||
"ldp q23, q4, [x20, #-96]",
|
||||
"add x20, x10, x6",
|
||||
"ldp q24, q5, [x20, #-128]",
|
||||
"sub x10, x10, x5",
|
||||
"add x11, x11, #0x1 (1)",
|
||||
"add x10, x10, x11",
|
||||
"sub x20, x5, #0x80 (128)",
|
||||
"add x6, x20, x6",
|
||||
"ldp q17, q6, [x10]",
|
||||
"ldp q18, q7, [x10, #32]",
|
||||
"ldp q19, q8, [x10, #64]",
|
||||
"ldp q20, q9, [x10, #96]",
|
||||
"add x10, x10, #0x80 (128)",
|
||||
"stp q17, q6, [x11]",
|
||||
"stp q18, q7, [x11, #32]",
|
||||
"stp q19, q8, [x11, #64]",
|
||||
"stp q20, q9, [x11, #96]",
|
||||
"add x11, x11, #0x80 (128)",
|
||||
"eor w27, w6, w11",
|
||||
"subs x26, x6, x11",
|
||||
"stp q4, q5, [x28, #128]",
|
||||
"stp q2, q3, [x28, #96]",
|
||||
"stp q8, q9, [x28, #64]",
|
||||
"stp q6, q7, [x28, #32]"
|
||||
]
|
||||
},
|
||||
"bytemark strsift": {
|
||||
"ExpectedInstructionCount": 20,
|
||||
"x86Insts": [
|
||||
"mov rsi,rdx",
|
||||
"and rsi,0xfffffffffffffffc",
|
||||
"movq xmm0,rcx",
|
||||
"pshufd xmm0,xmm0,0x44",
|
||||
"mov rdi,qword [rsp+0x20]",
|
||||
"lea rdi,[rdi+r13*8]",
|
||||
"xor r8d,r8d",
|
||||
"movdqu xmm1,oword [rdi+r8*8-0x10]",
|
||||
"movdqu xmm2,oword [rdi+r8*8]",
|
||||
"paddq xmm1,xmm0",
|
||||
"paddq xmm2,xmm0",
|
||||
"movdqu oword [rdi+r8*8-0x10],xmm1",
|
||||
"movdqu oword [rdi+r8*8],xmm2",
|
||||
"add r8,0x4",
|
||||
"cmp rsi,r8"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov x10, x6",
|
||||
"and x10, x10, #0xfffffffffffffffc",
|
||||
"fmov d16, x5",
|
||||
"dup v16.2d, v16.d[0]",
|
||||
"ldr x11, [x8, #32]",
|
||||
"add x11, x11, x17, lsl #3",
|
||||
"mov w12, #0x0",
|
||||
"add x20, x11, x12, lsl #3",
|
||||
"ldur q17, [x20, #-16]",
|
||||
"add x20, x11, x12, lsl #3",
|
||||
"ldr q18, [x20]",
|
||||
"add v17.2d, v17.2d, v16.2d",
|
||||
"add v18.2d, v18.2d, v16.2d",
|
||||
"add x20, x11, x12, lsl #3",
|
||||
"stur q17, [x20, #-16]",
|
||||
"add x20, x11, x12, lsl #3",
|
||||
"str q18, [x20]",
|
||||
"add x12, x12, #0x4 (4)",
|
||||
"eor w27, w10, w12",
|
||||
"subs x26, x10, x12"
|
||||
]
|
||||
},
|
||||
"pcmpistri xmm0, xmm1, 0_0_00_11_01b": {
|
||||
"ExpectedInstructionCount": 41,
|
||||
"Comment": [
|
||||
|
@ -1216,7 +1216,7 @@
|
||||
]
|
||||
},
|
||||
"fxsave [rax]": {
|
||||
"ExpectedInstructionCount": 52,
|
||||
"ExpectedInstructionCount": 39,
|
||||
"Comment": "GROUP15 0x0F 0xAE /0",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrh w20, [x28, #1296]",
|
||||
@ -1235,42 +1235,29 @@
|
||||
"ldrb w20, [x28, #1298]",
|
||||
"strb w20, [x4, #4]",
|
||||
"ldr q2, [x28, #1040]",
|
||||
"str q2, [x4, #32]",
|
||||
"ldr q2, [x28, #1056]",
|
||||
"str q2, [x4, #48]",
|
||||
"ldr q3, [x28, #1056]",
|
||||
"stp q2, q3, [x4, #32]",
|
||||
"ldr q2, [x28, #1072]",
|
||||
"str q2, [x4, #64]",
|
||||
"ldr q2, [x28, #1088]",
|
||||
"str q2, [x4, #80]",
|
||||
"ldr q3, [x28, #1088]",
|
||||
"stp q2, q3, [x4, #64]",
|
||||
"ldr q2, [x28, #1104]",
|
||||
"str q2, [x4, #96]",
|
||||
"ldr q2, [x28, #1120]",
|
||||
"str q2, [x4, #112]",
|
||||
"ldr q3, [x28, #1120]",
|
||||
"stp q2, q3, [x4, #96]",
|
||||
"ldr q2, [x28, #1136]",
|
||||
"str q2, [x4, #128]",
|
||||
"ldr q2, [x28, #1152]",
|
||||
"str q2, [x4, #144]",
|
||||
"str q16, [x4, #160]",
|
||||
"str q17, [x4, #176]",
|
||||
"str q18, [x4, #192]",
|
||||
"str q19, [x4, #208]",
|
||||
"str q20, [x4, #224]",
|
||||
"str q21, [x4, #240]",
|
||||
"str q22, [x4, #256]",
|
||||
"str q23, [x4, #272]",
|
||||
"str q24, [x4, #288]",
|
||||
"str q25, [x4, #304]",
|
||||
"str q26, [x4, #320]",
|
||||
"str q27, [x4, #336]",
|
||||
"str q28, [x4, #352]",
|
||||
"str q29, [x4, #368]",
|
||||
"str q30, [x4, #384]",
|
||||
"str q31, [x4, #400]",
|
||||
"ldr q3, [x28, #1152]",
|
||||
"stp q2, q3, [x4, #128]",
|
||||
"stp q16, q17, [x4, #160]",
|
||||
"stp q18, q19, [x4, #192]",
|
||||
"stp q20, q21, [x4, #224]",
|
||||
"stp q22, q23, [x4, #256]",
|
||||
"stp q24, q25, [x4, #288]",
|
||||
"stp q26, q27, [x4, #320]",
|
||||
"stp q28, q29, [x4, #352]",
|
||||
"stp q30, q31, [x4, #384]",
|
||||
"ldr w20, [x28, #940]",
|
||||
"and w20, w20, #0xffc0",
|
||||
"str w20, [x4, #24]",
|
||||
"mov w20, #0xffff",
|
||||
"str w20, [x4, #28]"
|
||||
"mov w21, #0xffff",
|
||||
"stp w20, w21, [x4, #24]"
|
||||
]
|
||||
},
|
||||
"rdfsbase eax": {
|
||||
@ -1288,7 +1275,7 @@
|
||||
]
|
||||
},
|
||||
"fxrstor [rax]": {
|
||||
"ExpectedInstructionCount": 58,
|
||||
"ExpectedInstructionCount": 46,
|
||||
"Comment": "GROUP15 0x0F 0xAE /1",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrh w20, [x4]",
|
||||
@ -1305,30 +1292,18 @@
|
||||
"strb w23, [x28, #1018]",
|
||||
"strb w20, [x28, #1022]",
|
||||
"ldrb w20, [x4, #4]",
|
||||
"ldr q2, [x4, #32]",
|
||||
"ldr q3, [x4, #48]",
|
||||
"ldr q4, [x4, #64]",
|
||||
"ldr q5, [x4, #80]",
|
||||
"ldr q6, [x4, #96]",
|
||||
"ldr q7, [x4, #112]",
|
||||
"ldr q8, [x4, #128]",
|
||||
"ldr q9, [x4, #144]",
|
||||
"ldr q16, [x4, #160]",
|
||||
"ldr q17, [x4, #176]",
|
||||
"ldr q18, [x4, #192]",
|
||||
"ldr q19, [x4, #208]",
|
||||
"ldr q20, [x4, #224]",
|
||||
"ldr q21, [x4, #240]",
|
||||
"ldr q22, [x4, #256]",
|
||||
"ldr q23, [x4, #272]",
|
||||
"ldr q24, [x4, #288]",
|
||||
"ldr q25, [x4, #304]",
|
||||
"ldr q26, [x4, #320]",
|
||||
"ldr q27, [x4, #336]",
|
||||
"ldr q28, [x4, #352]",
|
||||
"ldr q29, [x4, #368]",
|
||||
"ldr q30, [x4, #384]",
|
||||
"ldr q31, [x4, #400]",
|
||||
"ldp q2, q3, [x4, #32]",
|
||||
"ldp q4, q5, [x4, #64]",
|
||||
"ldp q6, q7, [x4, #96]",
|
||||
"ldp q8, q9, [x4, #128]",
|
||||
"ldp q16, q17, [x4, #160]",
|
||||
"ldp q18, q19, [x4, #192]",
|
||||
"ldp q20, q21, [x4, #224]",
|
||||
"ldp q22, q23, [x4, #256]",
|
||||
"ldp q24, q25, [x4, #288]",
|
||||
"ldp q26, q27, [x4, #320]",
|
||||
"ldp q28, q29, [x4, #352]",
|
||||
"ldp q30, q31, [x4, #384]",
|
||||
"ldr w21, [x4, #24]",
|
||||
"and w21, w21, #0xffc0",
|
||||
"str w21, [x28, #940]",
|
||||
@ -1422,12 +1397,12 @@
|
||||
]
|
||||
},
|
||||
"xsave [rax]": {
|
||||
"ExpectedInstructionCount": 98,
|
||||
"ExpectedInstructionCount": 69,
|
||||
"Comment": "GROUP15 0x0F 0xAE /4",
|
||||
"ExpectedArm64ASM": [
|
||||
"ubfx x20, x4, #0, #1",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x80",
|
||||
"b #+0x70",
|
||||
"ldrh w20, [x28, #1296]",
|
||||
"strh w20, [x4]",
|
||||
"ldrb w20, [x28, #1019]",
|
||||
@ -1444,83 +1419,54 @@
|
||||
"ldrb w20, [x28, #1298]",
|
||||
"strb w20, [x4, #4]",
|
||||
"ldr q2, [x28, #1040]",
|
||||
"str q2, [x4, #32]",
|
||||
"ldr q2, [x28, #1056]",
|
||||
"str q2, [x4, #48]",
|
||||
"ldr q3, [x28, #1056]",
|
||||
"stp q2, q3, [x4, #32]",
|
||||
"ldr q2, [x28, #1072]",
|
||||
"str q2, [x4, #64]",
|
||||
"ldr q2, [x28, #1088]",
|
||||
"str q2, [x4, #80]",
|
||||
"ldr q3, [x28, #1088]",
|
||||
"stp q2, q3, [x4, #64]",
|
||||
"ldr q2, [x28, #1104]",
|
||||
"str q2, [x4, #96]",
|
||||
"ldr q2, [x28, #1120]",
|
||||
"str q2, [x4, #112]",
|
||||
"ldr q3, [x28, #1120]",
|
||||
"stp q2, q3, [x4, #96]",
|
||||
"ldr q2, [x28, #1136]",
|
||||
"str q2, [x4, #128]",
|
||||
"ldr q2, [x28, #1152]",
|
||||
"str q2, [x4, #144]",
|
||||
"ldr q3, [x28, #1152]",
|
||||
"stp q2, q3, [x4, #128]",
|
||||
"ubfx x20, x4, #1, #1",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x44",
|
||||
"str q16, [x4, #160]",
|
||||
"str q17, [x4, #176]",
|
||||
"str q18, [x4, #192]",
|
||||
"str q19, [x4, #208]",
|
||||
"str q20, [x4, #224]",
|
||||
"str q21, [x4, #240]",
|
||||
"str q22, [x4, #256]",
|
||||
"str q23, [x4, #272]",
|
||||
"str q24, [x4, #288]",
|
||||
"str q25, [x4, #304]",
|
||||
"str q26, [x4, #320]",
|
||||
"str q27, [x4, #336]",
|
||||
"str q28, [x4, #352]",
|
||||
"str q29, [x4, #368]",
|
||||
"str q30, [x4, #384]",
|
||||
"str q31, [x4, #400]",
|
||||
"b #+0x24",
|
||||
"stp q16, q17, [x4, #160]",
|
||||
"stp q18, q19, [x4, #192]",
|
||||
"stp q20, q21, [x4, #224]",
|
||||
"stp q22, q23, [x4, #256]",
|
||||
"stp q24, q25, [x4, #288]",
|
||||
"stp q26, q27, [x4, #320]",
|
||||
"stp q28, q29, [x4, #352]",
|
||||
"stp q30, q31, [x4, #384]",
|
||||
"ubfx x20, x4, #2, #1",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x84",
|
||||
"ldr q2, [x28, #16]",
|
||||
"str q2, [x4, #576]",
|
||||
"ldr q2, [x28, #32]",
|
||||
"str q2, [x4, #592]",
|
||||
"ldr q2, [x28, #48]",
|
||||
"str q2, [x4, #608]",
|
||||
"ldr q2, [x28, #64]",
|
||||
"str q2, [x4, #624]",
|
||||
"ldr q2, [x28, #80]",
|
||||
"str q2, [x4, #640]",
|
||||
"ldr q2, [x28, #96]",
|
||||
"str q2, [x4, #656]",
|
||||
"ldr q2, [x28, #112]",
|
||||
"str q2, [x4, #672]",
|
||||
"ldr q2, [x28, #128]",
|
||||
"str q2, [x4, #688]",
|
||||
"ldr q2, [x28, #144]",
|
||||
"str q2, [x4, #704]",
|
||||
"ldr q2, [x28, #160]",
|
||||
"str q2, [x4, #720]",
|
||||
"ldr q2, [x28, #176]",
|
||||
"str q2, [x4, #736]",
|
||||
"ldr q2, [x28, #192]",
|
||||
"str q2, [x4, #752]",
|
||||
"ldr q2, [x28, #208]",
|
||||
"str q2, [x4, #768]",
|
||||
"ldr q2, [x28, #224]",
|
||||
"str q2, [x4, #784]",
|
||||
"ldr q2, [x28, #240]",
|
||||
"str q2, [x4, #800]",
|
||||
"ldr q2, [x28, #256]",
|
||||
"str q2, [x4, #816]",
|
||||
"b #+0x44",
|
||||
"ldp q2, q3, [x28, #16]",
|
||||
"stp q2, q3, [x4, #576]",
|
||||
"ldp q2, q3, [x28, #48]",
|
||||
"stp q2, q3, [x4, #608]",
|
||||
"ldp q2, q3, [x28, #80]",
|
||||
"stp q2, q3, [x4, #640]",
|
||||
"ldp q2, q3, [x28, #112]",
|
||||
"stp q2, q3, [x4, #672]",
|
||||
"ldp q2, q3, [x28, #144]",
|
||||
"stp q2, q3, [x4, #704]",
|
||||
"ldp q2, q3, [x28, #176]",
|
||||
"stp q2, q3, [x4, #736]",
|
||||
"ldp q2, q3, [x28, #208]",
|
||||
"stp q2, q3, [x4, #768]",
|
||||
"ldp q2, q3, [x28, #240]",
|
||||
"stp q2, q3, [x4, #800]",
|
||||
"ubfx x20, x4, #1, #2",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x18",
|
||||
"b #+0x14",
|
||||
"ldr w20, [x28, #940]",
|
||||
"and w20, w20, #0xffc0",
|
||||
"str w20, [x4, #24]",
|
||||
"mov w20, #0xffff",
|
||||
"str w20, [x4, #28]",
|
||||
"mov w21, #0xffff",
|
||||
"stp w20, w21, [x4, #24]",
|
||||
"ubfx x20, x4, #0, #3",
|
||||
"str x20, [x4, #512]"
|
||||
]
|
||||
@ -1533,14 +1479,14 @@
|
||||
]
|
||||
},
|
||||
"xrstor [rax]": {
|
||||
"ExpectedInstructionCount": 166,
|
||||
"ExpectedInstructionCount": 130,
|
||||
"Comment": "GROUP15 0x0F 0xAE /5",
|
||||
"ExpectedArm64ASM": [
|
||||
"sub sp, sp, #0x40 (64)",
|
||||
"ldr x20, [x4, #512]",
|
||||
"ubfx x20, x20, #0, #1",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x84",
|
||||
"b #+0x74",
|
||||
"ldrh w20, [x4]",
|
||||
"strh w20, [x28, #1296]",
|
||||
"ldrh w20, [x4, #2]",
|
||||
@ -1555,14 +1501,10 @@
|
||||
"strb w23, [x28, #1018]",
|
||||
"strb w20, [x28, #1022]",
|
||||
"ldrb w20, [x4, #4]",
|
||||
"ldr q2, [x4, #32]",
|
||||
"ldr q3, [x4, #48]",
|
||||
"ldr q4, [x4, #64]",
|
||||
"ldr q5, [x4, #80]",
|
||||
"ldr q6, [x4, #96]",
|
||||
"ldr q7, [x4, #112]",
|
||||
"ldr q8, [x4, #128]",
|
||||
"ldr q9, [x4, #144]",
|
||||
"ldp q2, q3, [x4, #32]",
|
||||
"ldp q4, q5, [x4, #64]",
|
||||
"ldp q6, q7, [x4, #96]",
|
||||
"ldp q8, q9, [x4, #128]",
|
||||
"strb w20, [x28, #1298]",
|
||||
"str q9, [x28, #1152]",
|
||||
"str q8, [x28, #1136]",
|
||||
@ -1593,23 +1535,15 @@
|
||||
"ldr x20, [x4, #512]",
|
||||
"ubfx x20, x20, #1, #1",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x48",
|
||||
"ldr q16, [x4, #160]",
|
||||
"ldr q17, [x4, #176]",
|
||||
"ldr q18, [x4, #192]",
|
||||
"ldr q19, [x4, #208]",
|
||||
"ldr q20, [x4, #224]",
|
||||
"ldr q21, [x4, #240]",
|
||||
"ldr q22, [x4, #256]",
|
||||
"ldr q23, [x4, #272]",
|
||||
"ldr q24, [x4, #288]",
|
||||
"ldr q25, [x4, #304]",
|
||||
"ldr q26, [x4, #320]",
|
||||
"ldr q27, [x4, #336]",
|
||||
"ldr q28, [x4, #352]",
|
||||
"ldr q29, [x4, #368]",
|
||||
"ldr q30, [x4, #384]",
|
||||
"ldr q31, [x4, #400]",
|
||||
"b #+0x28",
|
||||
"ldp q16, q17, [x4, #160]",
|
||||
"ldp q18, q19, [x4, #192]",
|
||||
"ldp q20, q21, [x4, #224]",
|
||||
"ldp q22, q23, [x4, #256]",
|
||||
"ldp q24, q25, [x4, #288]",
|
||||
"ldp q26, q27, [x4, #320]",
|
||||
"ldp q28, q29, [x4, #352]",
|
||||
"ldp q30, q31, [x4, #384]",
|
||||
"b #+0x44",
|
||||
"movi v31.2d, #0x0",
|
||||
"mov v30.16b, v31.16b",
|
||||
@ -1630,61 +1564,37 @@
|
||||
"ldr x20, [x4, #512]",
|
||||
"ubfx x20, x20, #2, #1",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x98",
|
||||
"ldr q2, [x4, #576]",
|
||||
"ldr q3, [x4, #592]",
|
||||
"ldr q4, [x4, #608]",
|
||||
"ldr q5, [x4, #624]",
|
||||
"ldr q6, [x4, #640]",
|
||||
"ldr q7, [x4, #656]",
|
||||
"ldr q8, [x4, #672]",
|
||||
"ldr q9, [x4, #688]",
|
||||
"ldr q10, [x4, #704]",
|
||||
"ldr q11, [x4, #720]",
|
||||
"ldr q12, [x4, #736]",
|
||||
"ldr q13, [x4, #752]",
|
||||
"ldr q14, [x4, #768]",
|
||||
"ldr q15, [x4, #784]",
|
||||
"b #+0x58",
|
||||
"ldp q2, q3, [x4, #576]",
|
||||
"ldp q4, q5, [x4, #608]",
|
||||
"ldp q6, q7, [x4, #640]",
|
||||
"ldp q8, q9, [x4, #672]",
|
||||
"ldp q10, q11, [x4, #704]",
|
||||
"ldp q12, q13, [x4, #736]",
|
||||
"ldp q14, q15, [x4, #768]",
|
||||
"str q2, [sp]",
|
||||
"ldr q2, [x4, #800]",
|
||||
"str q3, [sp, #32]",
|
||||
"ldr q3, [x4, #816]",
|
||||
"str q3, [x28, #256]",
|
||||
"str q2, [x28, #240]",
|
||||
"str q15, [x28, #224]",
|
||||
"str q14, [x28, #208]",
|
||||
"str q13, [x28, #192]",
|
||||
"str q12, [x28, #176]",
|
||||
"str q11, [x28, #160]",
|
||||
"str q10, [x28, #144]",
|
||||
"str q9, [x28, #128]",
|
||||
"str q8, [x28, #112]",
|
||||
"str q7, [x28, #96]",
|
||||
"str q6, [x28, #80]",
|
||||
"str q5, [x28, #64]",
|
||||
"str q4, [x28, #48]",
|
||||
"ldr q2, [sp, #32]",
|
||||
"str q2, [x28, #32]",
|
||||
"ldp q2, q3, [x4, #800]",
|
||||
"stp q2, q3, [x28, #240]",
|
||||
"stp q14, q15, [x28, #208]",
|
||||
"stp q12, q13, [x28, #176]",
|
||||
"stp q10, q11, [x28, #144]",
|
||||
"stp q8, q9, [x28, #112]",
|
||||
"stp q6, q7, [x28, #80]",
|
||||
"stp q4, q5, [x28, #48]",
|
||||
"ldr q2, [sp]",
|
||||
"str q2, [x28, #16]",
|
||||
"b #+0x48",
|
||||
"ldr q3, [sp, #32]",
|
||||
"stp q2, q3, [x28, #16]",
|
||||
"b #+0x28",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #256]",
|
||||
"str q2, [x28, #240]",
|
||||
"str q2, [x28, #224]",
|
||||
"str q2, [x28, #208]",
|
||||
"str q2, [x28, #192]",
|
||||
"str q2, [x28, #176]",
|
||||
"str q2, [x28, #160]",
|
||||
"str q2, [x28, #144]",
|
||||
"str q2, [x28, #128]",
|
||||
"str q2, [x28, #112]",
|
||||
"str q2, [x28, #96]",
|
||||
"str q2, [x28, #80]",
|
||||
"str q2, [x28, #64]",
|
||||
"str q2, [x28, #48]",
|
||||
"str q2, [x28, #32]",
|
||||
"str q2, [x28, #16]",
|
||||
"stp q2, q2, [x28, #240]",
|
||||
"stp q2, q2, [x28, #208]",
|
||||
"stp q2, q2, [x28, #176]",
|
||||
"stp q2, q2, [x28, #144]",
|
||||
"stp q2, q2, [x28, #112]",
|
||||
"stp q2, q2, [x28, #80]",
|
||||
"stp q2, q2, [x28, #48]",
|
||||
"stp q2, q2, [x28, #16]",
|
||||
"ldr x20, [x4, #512]",
|
||||
"ubfx x20, x20, #1, #2",
|
||||
"cbnz x20, #+0x8",
|
||||
|
@ -1406,7 +1406,7 @@
|
||||
]
|
||||
},
|
||||
"fxsave [rax]": {
|
||||
"ExpectedInstructionCount": 52,
|
||||
"ExpectedInstructionCount": 39,
|
||||
"Comment": "GROUP15 0x0F 0xAE /0",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrh w20, [x28, #1296]",
|
||||
@ -1425,42 +1425,29 @@
|
||||
"ldrb w20, [x28, #1298]",
|
||||
"strb w20, [x4, #4]",
|
||||
"ldr q2, [x28, #1040]",
|
||||
"str q2, [x4, #32]",
|
||||
"ldr q2, [x28, #1056]",
|
||||
"str q2, [x4, #48]",
|
||||
"ldr q3, [x28, #1056]",
|
||||
"stp q2, q3, [x4, #32]",
|
||||
"ldr q2, [x28, #1072]",
|
||||
"str q2, [x4, #64]",
|
||||
"ldr q2, [x28, #1088]",
|
||||
"str q2, [x4, #80]",
|
||||
"ldr q3, [x28, #1088]",
|
||||
"stp q2, q3, [x4, #64]",
|
||||
"ldr q2, [x28, #1104]",
|
||||
"str q2, [x4, #96]",
|
||||
"ldr q2, [x28, #1120]",
|
||||
"str q2, [x4, #112]",
|
||||
"ldr q3, [x28, #1120]",
|
||||
"stp q2, q3, [x4, #96]",
|
||||
"ldr q2, [x28, #1136]",
|
||||
"str q2, [x4, #128]",
|
||||
"ldr q2, [x28, #1152]",
|
||||
"str q2, [x4, #144]",
|
||||
"str q16, [x4, #160]",
|
||||
"str q17, [x4, #176]",
|
||||
"str q18, [x4, #192]",
|
||||
"str q19, [x4, #208]",
|
||||
"str q20, [x4, #224]",
|
||||
"str q21, [x4, #240]",
|
||||
"str q22, [x4, #256]",
|
||||
"str q23, [x4, #272]",
|
||||
"str q24, [x4, #288]",
|
||||
"str q25, [x4, #304]",
|
||||
"str q26, [x4, #320]",
|
||||
"str q27, [x4, #336]",
|
||||
"str q28, [x4, #352]",
|
||||
"str q29, [x4, #368]",
|
||||
"str q30, [x4, #384]",
|
||||
"str q31, [x4, #400]",
|
||||
"ldr q3, [x28, #1152]",
|
||||
"stp q2, q3, [x4, #128]",
|
||||
"stp q16, q17, [x4, #160]",
|
||||
"stp q18, q19, [x4, #192]",
|
||||
"stp q20, q21, [x4, #224]",
|
||||
"stp q22, q23, [x4, #256]",
|
||||
"stp q24, q25, [x4, #288]",
|
||||
"stp q26, q27, [x4, #320]",
|
||||
"stp q28, q29, [x4, #352]",
|
||||
"stp q30, q31, [x4, #384]",
|
||||
"ldr w20, [x28, #940]",
|
||||
"and w20, w20, #0xffc0",
|
||||
"str w20, [x4, #24]",
|
||||
"mov w20, #0xffff",
|
||||
"str w20, [x4, #28]"
|
||||
"mov w21, #0xffff",
|
||||
"stp w20, w21, [x4, #24]"
|
||||
]
|
||||
},
|
||||
"rdfsbase eax": {
|
||||
@ -1478,7 +1465,7 @@
|
||||
]
|
||||
},
|
||||
"fxrstor [rax]": {
|
||||
"ExpectedInstructionCount": 58,
|
||||
"ExpectedInstructionCount": 46,
|
||||
"Comment": "GROUP15 0x0F 0xAE /1",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrh w20, [x4]",
|
||||
@ -1495,30 +1482,18 @@
|
||||
"strb w23, [x28, #1018]",
|
||||
"strb w20, [x28, #1022]",
|
||||
"ldrb w20, [x4, #4]",
|
||||
"ldr q2, [x4, #32]",
|
||||
"ldr q3, [x4, #48]",
|
||||
"ldr q4, [x4, #64]",
|
||||
"ldr q5, [x4, #80]",
|
||||
"ldr q6, [x4, #96]",
|
||||
"ldr q7, [x4, #112]",
|
||||
"ldr q8, [x4, #128]",
|
||||
"ldr q9, [x4, #144]",
|
||||
"ldr q16, [x4, #160]",
|
||||
"ldr q17, [x4, #176]",
|
||||
"ldr q18, [x4, #192]",
|
||||
"ldr q19, [x4, #208]",
|
||||
"ldr q20, [x4, #224]",
|
||||
"ldr q21, [x4, #240]",
|
||||
"ldr q22, [x4, #256]",
|
||||
"ldr q23, [x4, #272]",
|
||||
"ldr q24, [x4, #288]",
|
||||
"ldr q25, [x4, #304]",
|
||||
"ldr q26, [x4, #320]",
|
||||
"ldr q27, [x4, #336]",
|
||||
"ldr q28, [x4, #352]",
|
||||
"ldr q29, [x4, #368]",
|
||||
"ldr q30, [x4, #384]",
|
||||
"ldr q31, [x4, #400]",
|
||||
"ldp q2, q3, [x4, #32]",
|
||||
"ldp q4, q5, [x4, #64]",
|
||||
"ldp q6, q7, [x4, #96]",
|
||||
"ldp q8, q9, [x4, #128]",
|
||||
"ldp q16, q17, [x4, #160]",
|
||||
"ldp q18, q19, [x4, #192]",
|
||||
"ldp q20, q21, [x4, #224]",
|
||||
"ldp q22, q23, [x4, #256]",
|
||||
"ldp q24, q25, [x4, #288]",
|
||||
"ldp q26, q27, [x4, #320]",
|
||||
"ldp q28, q29, [x4, #352]",
|
||||
"ldp q30, q31, [x4, #384]",
|
||||
"ldr w21, [x4, #24]",
|
||||
"and w21, w21, #0xffc0",
|
||||
"str w21, [x28, #940]",
|
||||
@ -1612,12 +1587,12 @@
|
||||
]
|
||||
},
|
||||
"xsave [rax]": {
|
||||
"ExpectedInstructionCount": 98,
|
||||
"ExpectedInstructionCount": 69,
|
||||
"Comment": "GROUP15 0x0F 0xAE /4",
|
||||
"ExpectedArm64ASM": [
|
||||
"ubfx x20, x4, #0, #1",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x80",
|
||||
"b #+0x70",
|
||||
"ldrh w20, [x28, #1296]",
|
||||
"strh w20, [x4]",
|
||||
"ldrb w20, [x28, #1019]",
|
||||
@ -1634,83 +1609,54 @@
|
||||
"ldrb w20, [x28, #1298]",
|
||||
"strb w20, [x4, #4]",
|
||||
"ldr q2, [x28, #1040]",
|
||||
"str q2, [x4, #32]",
|
||||
"ldr q2, [x28, #1056]",
|
||||
"str q2, [x4, #48]",
|
||||
"ldr q3, [x28, #1056]",
|
||||
"stp q2, q3, [x4, #32]",
|
||||
"ldr q2, [x28, #1072]",
|
||||
"str q2, [x4, #64]",
|
||||
"ldr q2, [x28, #1088]",
|
||||
"str q2, [x4, #80]",
|
||||
"ldr q3, [x28, #1088]",
|
||||
"stp q2, q3, [x4, #64]",
|
||||
"ldr q2, [x28, #1104]",
|
||||
"str q2, [x4, #96]",
|
||||
"ldr q2, [x28, #1120]",
|
||||
"str q2, [x4, #112]",
|
||||
"ldr q3, [x28, #1120]",
|
||||
"stp q2, q3, [x4, #96]",
|
||||
"ldr q2, [x28, #1136]",
|
||||
"str q2, [x4, #128]",
|
||||
"ldr q2, [x28, #1152]",
|
||||
"str q2, [x4, #144]",
|
||||
"ldr q3, [x28, #1152]",
|
||||
"stp q2, q3, [x4, #128]",
|
||||
"ubfx x20, x4, #1, #1",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x44",
|
||||
"str q16, [x4, #160]",
|
||||
"str q17, [x4, #176]",
|
||||
"str q18, [x4, #192]",
|
||||
"str q19, [x4, #208]",
|
||||
"str q20, [x4, #224]",
|
||||
"str q21, [x4, #240]",
|
||||
"str q22, [x4, #256]",
|
||||
"str q23, [x4, #272]",
|
||||
"str q24, [x4, #288]",
|
||||
"str q25, [x4, #304]",
|
||||
"str q26, [x4, #320]",
|
||||
"str q27, [x4, #336]",
|
||||
"str q28, [x4, #352]",
|
||||
"str q29, [x4, #368]",
|
||||
"str q30, [x4, #384]",
|
||||
"str q31, [x4, #400]",
|
||||
"b #+0x24",
|
||||
"stp q16, q17, [x4, #160]",
|
||||
"stp q18, q19, [x4, #192]",
|
||||
"stp q20, q21, [x4, #224]",
|
||||
"stp q22, q23, [x4, #256]",
|
||||
"stp q24, q25, [x4, #288]",
|
||||
"stp q26, q27, [x4, #320]",
|
||||
"stp q28, q29, [x4, #352]",
|
||||
"stp q30, q31, [x4, #384]",
|
||||
"ubfx x20, x4, #2, #1",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x84",
|
||||
"ldr q2, [x28, #16]",
|
||||
"str q2, [x4, #576]",
|
||||
"ldr q2, [x28, #32]",
|
||||
"str q2, [x4, #592]",
|
||||
"ldr q2, [x28, #48]",
|
||||
"str q2, [x4, #608]",
|
||||
"ldr q2, [x28, #64]",
|
||||
"str q2, [x4, #624]",
|
||||
"ldr q2, [x28, #80]",
|
||||
"str q2, [x4, #640]",
|
||||
"ldr q2, [x28, #96]",
|
||||
"str q2, [x4, #656]",
|
||||
"ldr q2, [x28, #112]",
|
||||
"str q2, [x4, #672]",
|
||||
"ldr q2, [x28, #128]",
|
||||
"str q2, [x4, #688]",
|
||||
"ldr q2, [x28, #144]",
|
||||
"str q2, [x4, #704]",
|
||||
"ldr q2, [x28, #160]",
|
||||
"str q2, [x4, #720]",
|
||||
"ldr q2, [x28, #176]",
|
||||
"str q2, [x4, #736]",
|
||||
"ldr q2, [x28, #192]",
|
||||
"str q2, [x4, #752]",
|
||||
"ldr q2, [x28, #208]",
|
||||
"str q2, [x4, #768]",
|
||||
"ldr q2, [x28, #224]",
|
||||
"str q2, [x4, #784]",
|
||||
"ldr q2, [x28, #240]",
|
||||
"str q2, [x4, #800]",
|
||||
"ldr q2, [x28, #256]",
|
||||
"str q2, [x4, #816]",
|
||||
"b #+0x44",
|
||||
"ldp q2, q3, [x28, #16]",
|
||||
"stp q2, q3, [x4, #576]",
|
||||
"ldp q2, q3, [x28, #48]",
|
||||
"stp q2, q3, [x4, #608]",
|
||||
"ldp q2, q3, [x28, #80]",
|
||||
"stp q2, q3, [x4, #640]",
|
||||
"ldp q2, q3, [x28, #112]",
|
||||
"stp q2, q3, [x4, #672]",
|
||||
"ldp q2, q3, [x28, #144]",
|
||||
"stp q2, q3, [x4, #704]",
|
||||
"ldp q2, q3, [x28, #176]",
|
||||
"stp q2, q3, [x4, #736]",
|
||||
"ldp q2, q3, [x28, #208]",
|
||||
"stp q2, q3, [x4, #768]",
|
||||
"ldp q2, q3, [x28, #240]",
|
||||
"stp q2, q3, [x4, #800]",
|
||||
"ubfx x20, x4, #1, #2",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x18",
|
||||
"b #+0x14",
|
||||
"ldr w20, [x28, #940]",
|
||||
"and w20, w20, #0xffc0",
|
||||
"str w20, [x4, #24]",
|
||||
"mov w20, #0xffff",
|
||||
"str w20, [x4, #28]",
|
||||
"mov w21, #0xffff",
|
||||
"stp w20, w21, [x4, #24]",
|
||||
"ubfx x20, x4, #0, #3",
|
||||
"str x20, [x4, #512]"
|
||||
]
|
||||
@ -1723,14 +1669,14 @@
|
||||
]
|
||||
},
|
||||
"xrstor [rax]": {
|
||||
"ExpectedInstructionCount": 166,
|
||||
"ExpectedInstructionCount": 130,
|
||||
"Comment": "GROUP15 0x0F 0xAE /5",
|
||||
"ExpectedArm64ASM": [
|
||||
"sub sp, sp, #0x40 (64)",
|
||||
"ldr x20, [x4, #512]",
|
||||
"ubfx x20, x20, #0, #1",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x84",
|
||||
"b #+0x74",
|
||||
"ldrh w20, [x4]",
|
||||
"strh w20, [x28, #1296]",
|
||||
"ldrh w20, [x4, #2]",
|
||||
@ -1745,14 +1691,10 @@
|
||||
"strb w23, [x28, #1018]",
|
||||
"strb w20, [x28, #1022]",
|
||||
"ldrb w20, [x4, #4]",
|
||||
"ldr q2, [x4, #32]",
|
||||
"ldr q3, [x4, #48]",
|
||||
"ldr q4, [x4, #64]",
|
||||
"ldr q5, [x4, #80]",
|
||||
"ldr q6, [x4, #96]",
|
||||
"ldr q7, [x4, #112]",
|
||||
"ldr q8, [x4, #128]",
|
||||
"ldr q9, [x4, #144]",
|
||||
"ldp q2, q3, [x4, #32]",
|
||||
"ldp q4, q5, [x4, #64]",
|
||||
"ldp q6, q7, [x4, #96]",
|
||||
"ldp q8, q9, [x4, #128]",
|
||||
"strb w20, [x28, #1298]",
|
||||
"str q9, [x28, #1152]",
|
||||
"str q8, [x28, #1136]",
|
||||
@ -1783,23 +1725,15 @@
|
||||
"ldr x20, [x4, #512]",
|
||||
"ubfx x20, x20, #1, #1",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x48",
|
||||
"ldr q16, [x4, #160]",
|
||||
"ldr q17, [x4, #176]",
|
||||
"ldr q18, [x4, #192]",
|
||||
"ldr q19, [x4, #208]",
|
||||
"ldr q20, [x4, #224]",
|
||||
"ldr q21, [x4, #240]",
|
||||
"ldr q22, [x4, #256]",
|
||||
"ldr q23, [x4, #272]",
|
||||
"ldr q24, [x4, #288]",
|
||||
"ldr q25, [x4, #304]",
|
||||
"ldr q26, [x4, #320]",
|
||||
"ldr q27, [x4, #336]",
|
||||
"ldr q28, [x4, #352]",
|
||||
"ldr q29, [x4, #368]",
|
||||
"ldr q30, [x4, #384]",
|
||||
"ldr q31, [x4, #400]",
|
||||
"b #+0x28",
|
||||
"ldp q16, q17, [x4, #160]",
|
||||
"ldp q18, q19, [x4, #192]",
|
||||
"ldp q20, q21, [x4, #224]",
|
||||
"ldp q22, q23, [x4, #256]",
|
||||
"ldp q24, q25, [x4, #288]",
|
||||
"ldp q26, q27, [x4, #320]",
|
||||
"ldp q28, q29, [x4, #352]",
|
||||
"ldp q30, q31, [x4, #384]",
|
||||
"b #+0x44",
|
||||
"movi v31.2d, #0x0",
|
||||
"mov v30.16b, v31.16b",
|
||||
@ -1820,61 +1754,37 @@
|
||||
"ldr x20, [x4, #512]",
|
||||
"ubfx x20, x20, #2, #1",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x98",
|
||||
"ldr q2, [x4, #576]",
|
||||
"ldr q3, [x4, #592]",
|
||||
"ldr q4, [x4, #608]",
|
||||
"ldr q5, [x4, #624]",
|
||||
"ldr q6, [x4, #640]",
|
||||
"ldr q7, [x4, #656]",
|
||||
"ldr q8, [x4, #672]",
|
||||
"ldr q9, [x4, #688]",
|
||||
"ldr q10, [x4, #704]",
|
||||
"ldr q11, [x4, #720]",
|
||||
"ldr q12, [x4, #736]",
|
||||
"ldr q13, [x4, #752]",
|
||||
"ldr q14, [x4, #768]",
|
||||
"ldr q15, [x4, #784]",
|
||||
"b #+0x58",
|
||||
"ldp q2, q3, [x4, #576]",
|
||||
"ldp q4, q5, [x4, #608]",
|
||||
"ldp q6, q7, [x4, #640]",
|
||||
"ldp q8, q9, [x4, #672]",
|
||||
"ldp q10, q11, [x4, #704]",
|
||||
"ldp q12, q13, [x4, #736]",
|
||||
"ldp q14, q15, [x4, #768]",
|
||||
"str q2, [sp]",
|
||||
"ldr q2, [x4, #800]",
|
||||
"str q3, [sp, #32]",
|
||||
"ldr q3, [x4, #816]",
|
||||
"str q3, [x28, #256]",
|
||||
"str q2, [x28, #240]",
|
||||
"str q15, [x28, #224]",
|
||||
"str q14, [x28, #208]",
|
||||
"str q13, [x28, #192]",
|
||||
"str q12, [x28, #176]",
|
||||
"str q11, [x28, #160]",
|
||||
"str q10, [x28, #144]",
|
||||
"str q9, [x28, #128]",
|
||||
"str q8, [x28, #112]",
|
||||
"str q7, [x28, #96]",
|
||||
"str q6, [x28, #80]",
|
||||
"str q5, [x28, #64]",
|
||||
"str q4, [x28, #48]",
|
||||
"ldr q2, [sp, #32]",
|
||||
"str q2, [x28, #32]",
|
||||
"ldp q2, q3, [x4, #800]",
|
||||
"stp q2, q3, [x28, #240]",
|
||||
"stp q14, q15, [x28, #208]",
|
||||
"stp q12, q13, [x28, #176]",
|
||||
"stp q10, q11, [x28, #144]",
|
||||
"stp q8, q9, [x28, #112]",
|
||||
"stp q6, q7, [x28, #80]",
|
||||
"stp q4, q5, [x28, #48]",
|
||||
"ldr q2, [sp]",
|
||||
"str q2, [x28, #16]",
|
||||
"b #+0x48",
|
||||
"ldr q3, [sp, #32]",
|
||||
"stp q2, q3, [x28, #16]",
|
||||
"b #+0x28",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #256]",
|
||||
"str q2, [x28, #240]",
|
||||
"str q2, [x28, #224]",
|
||||
"str q2, [x28, #208]",
|
||||
"str q2, [x28, #192]",
|
||||
"str q2, [x28, #176]",
|
||||
"str q2, [x28, #160]",
|
||||
"str q2, [x28, #144]",
|
||||
"str q2, [x28, #128]",
|
||||
"str q2, [x28, #112]",
|
||||
"str q2, [x28, #96]",
|
||||
"str q2, [x28, #80]",
|
||||
"str q2, [x28, #64]",
|
||||
"str q2, [x28, #48]",
|
||||
"str q2, [x28, #32]",
|
||||
"str q2, [x28, #16]",
|
||||
"stp q2, q2, [x28, #240]",
|
||||
"stp q2, q2, [x28, #208]",
|
||||
"stp q2, q2, [x28, #176]",
|
||||
"stp q2, q2, [x28, #144]",
|
||||
"stp q2, q2, [x28, #112]",
|
||||
"stp q2, q2, [x28, #80]",
|
||||
"stp q2, q2, [x28, #48]",
|
||||
"stp q2, q2, [x28, #16]",
|
||||
"ldr x20, [x4, #512]",
|
||||
"ubfx x20, x20, #1, #2",
|
||||
"cbnz x20, #+0x8",
|
||||
|
Loading…
x
Reference in New Issue
Block a user