mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-12-15 09:59:28 +00:00
Merge pull request #3542 from alyssarosenzweig/ra/rep
Eliminate xblock liveness with rep cmp/lod/scas
This commit is contained in:
commit
e8abc88702
@ -402,6 +402,24 @@ DEF_OP(CondAddNZCV) {
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(CondSubNZCV) {
|
||||
auto Op = IROp->C<IR::IROp_CondSubNZCV>();
|
||||
const auto OpSize = IROp->Size;
|
||||
|
||||
LOGMAN_THROW_AA_FMT(OpSize == IR::i32Bit || OpSize == IR::i64Bit, "Unsupported {} size: {}", __func__, OpSize);
|
||||
const auto EmitSize = OpSize == IR::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
|
||||
|
||||
ARMEmitter::StatusFlags Flags = (ARMEmitter::StatusFlags)Op->FalseNZCV;
|
||||
uint64_t Const = 0;
|
||||
auto Src1 = GetZeroableReg(Op->Src1);
|
||||
|
||||
if (IsInlineConstant(Op->Src2, &Const)) {
|
||||
ccmp(EmitSize, Src1, Const, Flags, MapSelectCC(Op->Cond));
|
||||
} else {
|
||||
ccmp(EmitSize, Src1, GetReg(Op->Src2.ID()), Flags, MapSelectCC(Op->Cond));
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(Neg) {
|
||||
auto Op = IROp->C<IR::IROp_Neg>();
|
||||
const uint8_t OpSize = IROp->Size;
|
||||
|
@ -3548,75 +3548,90 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
|
||||
|
||||
bool REPE = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX;
|
||||
|
||||
// read DF once
|
||||
auto PtrDir = LoadDir(Size);
|
||||
|
||||
auto JumpStart = Jump();
|
||||
// Make sure to start a new block after ending this one
|
||||
auto LoopStart = CreateNewCodeBlockAfter(GetCurrentBlock());
|
||||
SetJumpTarget(JumpStart, LoopStart);
|
||||
SetCurrentCodeBlock(LoopStart);
|
||||
StartNewBlock();
|
||||
|
||||
// If rcx = 0, skip the whole loop.
|
||||
OrderedNode *Counter = LoadGPRRegister(X86State::REG_RCX);
|
||||
auto OuterJump = CondJump(Counter, {COND_EQ});
|
||||
|
||||
// Can we end the block?
|
||||
auto CondJump_ = CondJump(Counter, {COND_EQ});
|
||||
IRPair<IROp_CondJump> InternalCondJump;
|
||||
|
||||
auto LoopTail = CreateNewCodeBlockAfter(LoopStart);
|
||||
SetFalseJumpTarget(CondJump_, LoopTail);
|
||||
SetCurrentCodeBlock(LoopTail);
|
||||
auto BeforeLoop = CreateNewCodeBlockAfter(GetCurrentBlock());
|
||||
SetFalseJumpTarget(OuterJump, BeforeLoop);
|
||||
SetCurrentCodeBlock(BeforeLoop);
|
||||
StartNewBlock();
|
||||
|
||||
// Working loop
|
||||
{
|
||||
OrderedNode *Dest_RSI = LoadGPRRegister(X86State::REG_RSI);
|
||||
OrderedNode *Dest_RDI = LoadGPRRegister(X86State::REG_RDI);
|
||||
ForeachDirection([this, Op, Size, REPE](int PtrDir) {
|
||||
IRPair<IROp_CondJump> InnerJump;
|
||||
auto JumpIntoLoop = Jump();
|
||||
|
||||
// Only ES prefix
|
||||
Dest_RDI = AppendSegmentOffset(Dest_RDI, 0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);
|
||||
// Default DS prefix
|
||||
Dest_RSI = AppendSegmentOffset(Dest_RSI, Op->Flags, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX);
|
||||
// Setup for the loop
|
||||
auto LoopHeader = CreateNewCodeBlockAfter(GetCurrentBlock());
|
||||
SetCurrentCodeBlock(LoopHeader);
|
||||
StartNewBlock();
|
||||
SetJumpTarget(JumpIntoLoop, LoopHeader);
|
||||
|
||||
auto Src1 = _LoadMemAutoTSO(GPRClass, Size, Dest_RDI, Size);
|
||||
auto Src2 = _LoadMem(GPRClass, Size, Dest_RSI, Size);
|
||||
// Working loop
|
||||
{
|
||||
OrderedNode *Dest_RSI = LoadGPRRegister(X86State::REG_RSI);
|
||||
OrderedNode *Dest_RDI = LoadGPRRegister(X86State::REG_RDI);
|
||||
|
||||
GenerateFlags_SUB(Op, Src2, Src1);
|
||||
// Only ES prefix
|
||||
Dest_RDI = AppendSegmentOffset(Dest_RDI, 0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);
|
||||
// Default DS prefix
|
||||
Dest_RSI = AppendSegmentOffset(Dest_RSI, Op->Flags, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX);
|
||||
|
||||
// Calculate flags early.
|
||||
CalculateDeferredFlags();
|
||||
auto Src1 = _LoadMemAutoTSO(GPRClass, Size, Dest_RDI, Size);
|
||||
auto Src2 = _LoadMem(GPRClass, Size, Dest_RSI, Size);
|
||||
|
||||
OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX);
|
||||
// We'll calculate PF/AF after the loop, so use them as temporaries here.
|
||||
_StoreRegister(Src1, false, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
|
||||
_StoreRegister(Src2, false, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
|
||||
|
||||
// Decrement counter
|
||||
TailCounter = _Sub(OpSize::i64Bit, TailCounter, _Constant(1));
|
||||
OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX);
|
||||
|
||||
// Store the counter since we don't have phis
|
||||
StoreGPRRegister(X86State::REG_RCX, TailCounter);
|
||||
// Decrement counter
|
||||
TailCounter = _SubWithFlags(OpSize::i64Bit, TailCounter, _Constant(1));
|
||||
|
||||
// Offset the pointer
|
||||
Dest_RDI = _Add(OpSize::i64Bit, Dest_RDI, PtrDir);
|
||||
StoreGPRRegister(X86State::REG_RDI, Dest_RDI);
|
||||
// Store the counter since we don't have phis
|
||||
StoreGPRRegister(X86State::REG_RCX, TailCounter);
|
||||
|
||||
// Offset second pointer
|
||||
Dest_RSI = _Add(OpSize::i64Bit, Dest_RSI, PtrDir);
|
||||
StoreGPRRegister(X86State::REG_RSI, Dest_RSI);
|
||||
// Offset the pointer
|
||||
Dest_RDI = _Add(OpSize::i64Bit, Dest_RDI, _Constant(PtrDir * Size));
|
||||
StoreGPRRegister(X86State::REG_RDI, Dest_RDI);
|
||||
|
||||
CalculateDeferredFlags();
|
||||
InternalCondJump = CondJumpNZCV({REPE ? COND_EQ : COND_NEQ});
|
||||
// Offset second pointer
|
||||
Dest_RSI = _Add(OpSize::i64Bit, Dest_RSI, _Constant(PtrDir * Size));
|
||||
StoreGPRRegister(X86State::REG_RSI, Dest_RSI);
|
||||
|
||||
// Jump back to the start if we have more work to do
|
||||
SetTrueJumpTarget(InternalCondJump, LoopStart);
|
||||
}
|
||||
// If TailCounter != 0, compare sources.
|
||||
// If TailCounter == 0, set ZF iff that would break.
|
||||
_CondSubNZCV(OpSize::i64Bit, Src2, Src1, {COND_NEQ}, REPE ? 0 : (1 << 2) /* Z */);
|
||||
CachedNZCV = nullptr;
|
||||
NZCVDirty = false;
|
||||
InnerJump = CondJumpNZCV({REPE ? COND_EQ : COND_NEQ});
|
||||
|
||||
// Jump back to the start if we have more work to do
|
||||
SetTrueJumpTarget(InnerJump, LoopHeader);
|
||||
}
|
||||
|
||||
// Make sure to start a new block after ending this one
|
||||
auto LoopEnd = CreateNewCodeBlockAfter(GetCurrentBlock());
|
||||
SetFalseJumpTarget(InnerJump, LoopEnd);
|
||||
SetCurrentCodeBlock(LoopEnd);
|
||||
StartNewBlock();
|
||||
});
|
||||
|
||||
// Make sure to start a new block after ending this one
|
||||
auto LoopEnd = CreateNewCodeBlockAfter(LoopTail);
|
||||
SetTrueJumpTarget(CondJump_, LoopEnd);
|
||||
{
|
||||
// Grab the sources from the last iteration so we can set flags.
|
||||
auto Src1 = _LoadRegister(false, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
|
||||
auto Src2 = _LoadRegister(false, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
|
||||
GenerateFlags_SUB(Op, Src2, Src1);
|
||||
CalculateDeferredFlags();
|
||||
}
|
||||
auto Jump_ = Jump();
|
||||
|
||||
SetFalseJumpTarget(InternalCondJump, LoopEnd);
|
||||
|
||||
SetCurrentCodeBlock(LoopEnd);
|
||||
auto Exit = CreateNewCodeBlockAfter(GetCurrentBlock());
|
||||
SetJumpTarget(Jump_, Exit);
|
||||
SetTrueJumpTarget(OuterJump, Exit);
|
||||
SetCurrentCodeBlock(Exit);
|
||||
StartNewBlock();
|
||||
}
|
||||
}
|
||||
@ -3647,65 +3662,64 @@ void OpDispatchBuilder::LODSOp(OpcodeArgs) {
|
||||
// Calculate flags early. because end of block
|
||||
CalculateDeferredFlags();
|
||||
|
||||
// XXX: Theoretically LODS could be optimized to
|
||||
// RSI += {-}(RCX * Size)
|
||||
// RAX = [RSI - Size]
|
||||
// But this might violate the case of an application scanning pages for read permission and catching the fault
|
||||
// May or may not matter
|
||||
ForeachDirection([this, Op, Size](int PtrDir) {
|
||||
// XXX: Theoretically LODS could be optimized to
|
||||
// RSI += {-}(RCX * Size)
|
||||
// RAX = [RSI - Size]
|
||||
// But this might violate the case of an application scanning pages for read permission and catching the fault
|
||||
// May or may not matter
|
||||
|
||||
// Read DF once
|
||||
auto PtrDir = LoadDir(Size);
|
||||
auto JumpStart = Jump();
|
||||
// Make sure to start a new block after ending this one
|
||||
auto LoopStart = CreateNewCodeBlockAfter(GetCurrentBlock());
|
||||
SetJumpTarget(JumpStart, LoopStart);
|
||||
SetCurrentCodeBlock(LoopStart);
|
||||
StartNewBlock();
|
||||
|
||||
auto JumpStart = Jump();
|
||||
// Make sure to start a new block after ending this one
|
||||
auto LoopStart = CreateNewCodeBlockAfter(GetCurrentBlock());
|
||||
SetJumpTarget(JumpStart, LoopStart);
|
||||
SetCurrentCodeBlock(LoopStart);
|
||||
StartNewBlock();
|
||||
OrderedNode *Counter = LoadGPRRegister(X86State::REG_RCX);
|
||||
|
||||
OrderedNode *Counter = LoadGPRRegister(X86State::REG_RCX);
|
||||
// Can we end the block?
|
||||
|
||||
// Can we end the block?
|
||||
// We leave if RCX = 0
|
||||
auto CondJump_ = CondJump(Counter, {COND_EQ});
|
||||
|
||||
// We leave if RCX = 0
|
||||
auto CondJump_ = CondJump(Counter, {COND_EQ});
|
||||
auto LoopTail = CreateNewCodeBlockAfter(LoopStart);
|
||||
SetFalseJumpTarget(CondJump_, LoopTail);
|
||||
SetCurrentCodeBlock(LoopTail);
|
||||
StartNewBlock();
|
||||
|
||||
auto LoopTail = CreateNewCodeBlockAfter(LoopStart);
|
||||
SetFalseJumpTarget(CondJump_, LoopTail);
|
||||
SetCurrentCodeBlock(LoopTail);
|
||||
StartNewBlock();
|
||||
// Working loop
|
||||
{
|
||||
OrderedNode *Dest_RSI = LoadGPRRegister(X86State::REG_RSI);
|
||||
|
||||
// Working loop
|
||||
{
|
||||
OrderedNode *Dest_RSI = LoadGPRRegister(X86State::REG_RSI);
|
||||
Dest_RSI = AppendSegmentOffset(Dest_RSI, Op->Flags, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX);
|
||||
|
||||
Dest_RSI = AppendSegmentOffset(Dest_RSI, Op->Flags, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX);
|
||||
auto Src = _LoadMemAutoTSO(GPRClass, Size, Dest_RSI, Size);
|
||||
|
||||
auto Src = _LoadMemAutoTSO(GPRClass, Size, Dest_RSI, Size);
|
||||
StoreResult(GPRClass, Op, Src, -1);
|
||||
|
||||
StoreResult(GPRClass, Op, Src, -1);
|
||||
OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX);
|
||||
OrderedNode *TailDest_RSI = LoadGPRRegister(X86State::REG_RSI);
|
||||
|
||||
OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX);
|
||||
OrderedNode *TailDest_RSI = LoadGPRRegister(X86State::REG_RSI);
|
||||
// Decrement counter
|
||||
TailCounter = _Sub(OpSize::i64Bit, TailCounter, _Constant(1));
|
||||
|
||||
// Decrement counter
|
||||
TailCounter = _Sub(OpSize::i64Bit, TailCounter, _Constant(1));
|
||||
// Store the counter since we don't have phis
|
||||
StoreGPRRegister(X86State::REG_RCX, TailCounter);
|
||||
|
||||
// Store the counter since we don't have phis
|
||||
StoreGPRRegister(X86State::REG_RCX, TailCounter);
|
||||
// Offset the pointer
|
||||
TailDest_RSI = _Add(OpSize::i64Bit, TailDest_RSI, _Constant(PtrDir * Size));
|
||||
StoreGPRRegister(X86State::REG_RSI, TailDest_RSI);
|
||||
|
||||
// Offset the pointer
|
||||
TailDest_RSI = _Add(OpSize::i64Bit, TailDest_RSI, PtrDir);
|
||||
StoreGPRRegister(X86State::REG_RSI, TailDest_RSI);
|
||||
|
||||
// Jump back to the start, we have more work to do
|
||||
Jump(LoopStart);
|
||||
}
|
||||
// Make sure to start a new block after ending this one
|
||||
auto LoopEnd = CreateNewCodeBlockAfter(LoopTail);
|
||||
SetTrueJumpTarget(CondJump_, LoopEnd);
|
||||
SetCurrentCodeBlock(LoopEnd);
|
||||
StartNewBlock();
|
||||
// Jump back to the start, we have more work to do
|
||||
Jump(LoopStart);
|
||||
}
|
||||
// Make sure to start a new block after ending this one
|
||||
auto LoopEnd = CreateNewCodeBlockAfter(LoopTail);
|
||||
SetTrueJumpTarget(CondJump_, LoopEnd);
|
||||
SetCurrentCodeBlock(LoopEnd);
|
||||
StartNewBlock();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@ -3736,71 +3750,70 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) {
|
||||
// Calculate flags early. because end of block
|
||||
CalculateDeferredFlags();
|
||||
|
||||
bool REPE = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX;
|
||||
ForeachDirection([this, Op, Size](int Dir){
|
||||
bool REPE = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX;
|
||||
|
||||
// read DF once
|
||||
auto PtrDir = LoadDir(Size);
|
||||
auto JumpStart = Jump();
|
||||
// Make sure to start a new block after ending this one
|
||||
auto LoopStart = CreateNewCodeBlockAfter(GetCurrentBlock());
|
||||
SetJumpTarget(JumpStart, LoopStart);
|
||||
SetCurrentCodeBlock(LoopStart);
|
||||
StartNewBlock();
|
||||
|
||||
auto JumpStart = Jump();
|
||||
// Make sure to start a new block after ending this one
|
||||
auto LoopStart = CreateNewCodeBlockAfter(GetCurrentBlock());
|
||||
SetJumpTarget(JumpStart, LoopStart);
|
||||
SetCurrentCodeBlock(LoopStart);
|
||||
StartNewBlock();
|
||||
OrderedNode *Counter = LoadGPRRegister(X86State::REG_RCX);
|
||||
|
||||
OrderedNode *Counter = LoadGPRRegister(X86State::REG_RCX);
|
||||
// Can we end the block?
|
||||
// We leave if RCX = 0
|
||||
auto CondJump_ = CondJump(Counter, {COND_EQ});
|
||||
IRPair<IROp_CondJump> InternalCondJump;
|
||||
|
||||
// Can we end the block?
|
||||
// We leave if RCX = 0
|
||||
auto CondJump_ = CondJump(Counter, {COND_EQ});
|
||||
IRPair<IROp_CondJump> InternalCondJump;
|
||||
auto LoopTail = CreateNewCodeBlockAfter(LoopStart);
|
||||
SetFalseJumpTarget(CondJump_, LoopTail);
|
||||
SetCurrentCodeBlock(LoopTail);
|
||||
StartNewBlock();
|
||||
|
||||
auto LoopTail = CreateNewCodeBlockAfter(LoopStart);
|
||||
SetFalseJumpTarget(CondJump_, LoopTail);
|
||||
SetCurrentCodeBlock(LoopTail);
|
||||
StartNewBlock();
|
||||
// Working loop
|
||||
{
|
||||
OrderedNode *Dest_RDI = LoadGPRRegister(X86State::REG_RDI);
|
||||
|
||||
// Working loop
|
||||
{
|
||||
OrderedNode *Dest_RDI = LoadGPRRegister(X86State::REG_RDI);
|
||||
Dest_RDI = AppendSegmentOffset(Dest_RDI, 0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);
|
||||
|
||||
Dest_RDI = AppendSegmentOffset(Dest_RDI, 0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);
|
||||
auto Src1 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
|
||||
auto Src2 = _LoadMemAutoTSO(GPRClass, Size, Dest_RDI, Size);
|
||||
|
||||
auto Src1 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
|
||||
auto Src2 = _LoadMemAutoTSO(GPRClass, Size, Dest_RDI, Size);
|
||||
GenerateFlags_SUB(Op, Src1, Src2);
|
||||
|
||||
GenerateFlags_SUB(Op, Src1, Src2);
|
||||
// Calculate flags early.
|
||||
CalculateDeferredFlags();
|
||||
|
||||
// Calculate flags early.
|
||||
CalculateDeferredFlags();
|
||||
OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX);
|
||||
OrderedNode *TailDest_RDI = LoadGPRRegister(X86State::REG_RDI);
|
||||
|
||||
OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX);
|
||||
OrderedNode *TailDest_RDI = LoadGPRRegister(X86State::REG_RDI);
|
||||
// Decrement counter
|
||||
TailCounter = _Sub(OpSize::i64Bit, TailCounter, _Constant(1));
|
||||
|
||||
// Decrement counter
|
||||
TailCounter = _Sub(OpSize::i64Bit, TailCounter, _Constant(1));
|
||||
// Store the counter since we don't have phis
|
||||
StoreGPRRegister(X86State::REG_RCX, TailCounter);
|
||||
|
||||
// Store the counter since we don't have phis
|
||||
StoreGPRRegister(X86State::REG_RCX, TailCounter);
|
||||
// Offset the pointer
|
||||
TailDest_RDI = _Add(OpSize::i64Bit, TailDest_RDI, _Constant(Dir * Size));
|
||||
StoreGPRRegister(X86State::REG_RDI, TailDest_RDI);
|
||||
|
||||
// Offset the pointer
|
||||
TailDest_RDI = _Add(OpSize::i64Bit, TailDest_RDI, PtrDir);
|
||||
StoreGPRRegister(X86State::REG_RDI, TailDest_RDI);
|
||||
CalculateDeferredFlags();
|
||||
InternalCondJump = CondJumpNZCV({REPE ? COND_EQ : COND_NEQ});
|
||||
|
||||
CalculateDeferredFlags();
|
||||
InternalCondJump = CondJumpNZCV({REPE ? COND_EQ : COND_NEQ});
|
||||
// Jump back to the start if we have more work to do
|
||||
SetTrueJumpTarget(InternalCondJump, LoopStart);
|
||||
}
|
||||
// Make sure to start a new block after ending this one
|
||||
auto LoopEnd = CreateNewCodeBlockAfter(LoopTail);
|
||||
SetTrueJumpTarget(CondJump_, LoopEnd);
|
||||
|
||||
// Jump back to the start if we have more work to do
|
||||
SetTrueJumpTarget(InternalCondJump, LoopStart);
|
||||
}
|
||||
// Make sure to start a new block after ending this one
|
||||
auto LoopEnd = CreateNewCodeBlockAfter(LoopTail);
|
||||
SetTrueJumpTarget(CondJump_, LoopEnd);
|
||||
SetFalseJumpTarget(InternalCondJump, LoopEnd);
|
||||
|
||||
SetFalseJumpTarget(InternalCondJump, LoopEnd);
|
||||
|
||||
SetCurrentCodeBlock(LoopEnd);
|
||||
StartNewBlock();
|
||||
SetCurrentCodeBlock(LoopEnd);
|
||||
StartNewBlock();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -223,6 +223,32 @@ public:
|
||||
return CanHaveSideEffects;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void ForeachDirection(F&& Routine) {
|
||||
// Otherwise, prepare to branch.
|
||||
auto Zero = _Constant(0);
|
||||
|
||||
// If the shift is zero, do not touch the flags.
|
||||
auto ForwardBlock = CreateNewCodeBlockAfter(GetCurrentBlock());
|
||||
auto BackwardBlock = CreateNewCodeBlockAfter(ForwardBlock);
|
||||
auto ExitBlock = CreateNewCodeBlockAfter(BackwardBlock);
|
||||
|
||||
auto DF = GetRFLAG(X86State::RFLAG_DF_RAW_LOC);
|
||||
CondJump(DF, Zero, ForwardBlock, BackwardBlock, {COND_EQ});
|
||||
|
||||
for (auto D = 0; D < 2; ++D) {
|
||||
SetCurrentCodeBlock(D ? BackwardBlock : ForwardBlock);
|
||||
StartNewBlock();
|
||||
{
|
||||
Routine(D ? -1 : 1);
|
||||
Jump(ExitBlock);
|
||||
}
|
||||
}
|
||||
|
||||
SetCurrentCodeBlock(ExitBlock);
|
||||
StartNewBlock();
|
||||
}
|
||||
|
||||
OpDispatchBuilder(FEXCore::Context::ContextImpl *ctx);
|
||||
OpDispatchBuilder(FEXCore::Utils::IntrusivePooledAllocator &Allocator);
|
||||
|
||||
|
@ -1035,6 +1035,14 @@
|
||||
"Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
|
||||
]
|
||||
},
|
||||
"CondSubNZCV OpSize:#Size, GPR:$Src1, GPR:$Src2, CondClass:$Cond, u8:$FalseNZCV": {
|
||||
"Desc": ["If condition is true, set NZCV per difference of GPRs, else force NZCV to a constant."],
|
||||
"HasSideEffects": true,
|
||||
"DestSize": "Size",
|
||||
"EmitValidation": [
|
||||
"Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
|
||||
]
|
||||
},
|
||||
"GPR = AdcWithFlags OpSize:#Size, GPR:$Src1, GPR:$Src2": {
|
||||
"Desc": ["Adds and set NZCV for the sum of two GPRs and carry-in given as NZCV"],
|
||||
"HasSideEffects": true,
|
||||
|
@ -1164,6 +1164,7 @@ bool ConstProp::ConstantInlining(IREmitter *IREmit, const IRListView& CurrentIR)
|
||||
break;
|
||||
}
|
||||
case OP_CONDADDNZCV:
|
||||
case OP_CONDSUBNZCV:
|
||||
{
|
||||
auto Op = IROp->C<IR::IROp_CondAddNZCV>();
|
||||
|
||||
|
@ -227,6 +227,7 @@ DeadFlagCalculationEliminination::Classify(IROp_Header *IROp)
|
||||
return {.Read = FlagsForCondClassType(Op->Cond)};
|
||||
}
|
||||
|
||||
case OP_CONDSUBNZCV:
|
||||
case OP_CONDADDNZCV: {
|
||||
auto Op = IROp->CW<IR::IROp_CondAddNZCV>();
|
||||
return {
|
||||
|
@ -1950,153 +1950,251 @@
|
||||
]
|
||||
},
|
||||
"repz cmpsb": {
|
||||
"ExpectedInstructionCount": 13,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": "0xa6",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x68",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"cbz x5, #+0x30",
|
||||
"ldrb w21, [x11]",
|
||||
"ldrb w22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"lsl w0, w22, #24",
|
||||
"cmp w0, w21, lsl #24",
|
||||
"sub w26, w22, w21",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.eq #-0x2c"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldrb w26, [x11]",
|
||||
"ldrb w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x1 (1)",
|
||||
"add x10, x10, #0x1 (1)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"b #+0x20",
|
||||
"ldrb w26, [x11]",
|
||||
"ldrb w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x1 (1)",
|
||||
"sub x10, x10, #0x1 (1)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"lsl w0, w20, #24",
|
||||
"cmp w0, w26, lsl #24",
|
||||
"sub w26, w20, w26",
|
||||
"cfinv"
|
||||
]
|
||||
},
|
||||
"repz cmpsw": {
|
||||
"ExpectedInstructionCount": 14,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": "0xa7",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x68",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #1",
|
||||
"cbz x5, #+0x30",
|
||||
"ldrh w21, [x11]",
|
||||
"ldrh w22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"lsl w0, w22, #16",
|
||||
"cmp w0, w21, lsl #16",
|
||||
"sub w26, w22, w21",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.eq #-0x2c"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldrh w26, [x11]",
|
||||
"ldrh w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x2 (2)",
|
||||
"add x10, x10, #0x2 (2)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"b #+0x20",
|
||||
"ldrh w26, [x11]",
|
||||
"ldrh w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x2 (2)",
|
||||
"sub x10, x10, #0x2 (2)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"lsl w0, w20, #16",
|
||||
"cmp w0, w26, lsl #16",
|
||||
"sub w26, w20, w26",
|
||||
"cfinv"
|
||||
]
|
||||
},
|
||||
"repz cmpsd": {
|
||||
"ExpectedInstructionCount": 12,
|
||||
"ExpectedInstructionCount": 24,
|
||||
"Comment": "0xa7",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x60",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #2",
|
||||
"cbz x5, #+0x28",
|
||||
"ldr w21, [x11]",
|
||||
"ldr w22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"subs w26, w22, w21",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.eq #-0x24"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldr w26, [x11]",
|
||||
"ldr w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x4 (4)",
|
||||
"add x10, x10, #0x4 (4)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"b #+0x20",
|
||||
"ldr w26, [x11]",
|
||||
"ldr w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x4 (4)",
|
||||
"sub x10, x10, #0x4 (4)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"subs w26, w20, w26",
|
||||
"cfinv"
|
||||
]
|
||||
},
|
||||
"repz cmpsq": {
|
||||
"ExpectedInstructionCount": 12,
|
||||
"ExpectedInstructionCount": 24,
|
||||
"Comment": "0xa7",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x60",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #3",
|
||||
"cbz x5, #+0x28",
|
||||
"ldr x21, [x11]",
|
||||
"ldr x22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"subs x26, x22, x21",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.eq #-0x24"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldr x26, [x11]",
|
||||
"ldr x27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x8 (8)",
|
||||
"add x10, x10, #0x8 (8)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"b #+0x20",
|
||||
"ldr x26, [x11]",
|
||||
"ldr x27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x8 (8)",
|
||||
"sub x10, x10, #0x8 (8)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"subs x26, x20, x26",
|
||||
"cfinv"
|
||||
]
|
||||
},
|
||||
"repnz cmpsb": {
|
||||
"ExpectedInstructionCount": 13,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": "0xa6",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x68",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"cbz x5, #+0x30",
|
||||
"ldrb w21, [x11]",
|
||||
"ldrb w22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"lsl w0, w22, #24",
|
||||
"cmp w0, w21, lsl #24",
|
||||
"sub w26, w22, w21",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.ne #-0x2c"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldrb w26, [x11]",
|
||||
"ldrb w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x1 (1)",
|
||||
"add x10, x10, #0x1 (1)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"b #+0x20",
|
||||
"ldrb w26, [x11]",
|
||||
"ldrb w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x1 (1)",
|
||||
"sub x10, x10, #0x1 (1)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"lsl w0, w20, #24",
|
||||
"cmp w0, w26, lsl #24",
|
||||
"sub w26, w20, w26",
|
||||
"cfinv"
|
||||
]
|
||||
},
|
||||
"repnz cmpsw": {
|
||||
"ExpectedInstructionCount": 14,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": "0xa7",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x68",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #1",
|
||||
"cbz x5, #+0x30",
|
||||
"ldrh w21, [x11]",
|
||||
"ldrh w22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"lsl w0, w22, #16",
|
||||
"cmp w0, w21, lsl #16",
|
||||
"sub w26, w22, w21",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.ne #-0x2c"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldrh w26, [x11]",
|
||||
"ldrh w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x2 (2)",
|
||||
"add x10, x10, #0x2 (2)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"b #+0x20",
|
||||
"ldrh w26, [x11]",
|
||||
"ldrh w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x2 (2)",
|
||||
"sub x10, x10, #0x2 (2)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"lsl w0, w20, #16",
|
||||
"cmp w0, w26, lsl #16",
|
||||
"sub w26, w20, w26",
|
||||
"cfinv"
|
||||
]
|
||||
},
|
||||
"repnz cmpsd": {
|
||||
"ExpectedInstructionCount": 12,
|
||||
"ExpectedInstructionCount": 24,
|
||||
"Comment": "0xa7",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x60",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #2",
|
||||
"cbz x5, #+0x28",
|
||||
"ldr w21, [x11]",
|
||||
"ldr w22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"subs w26, w22, w21",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.ne #-0x24"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldr w26, [x11]",
|
||||
"ldr w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x4 (4)",
|
||||
"add x10, x10, #0x4 (4)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"b #+0x20",
|
||||
"ldr w26, [x11]",
|
||||
"ldr w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x4 (4)",
|
||||
"sub x10, x10, #0x4 (4)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"subs w26, w20, w26",
|
||||
"cfinv"
|
||||
]
|
||||
},
|
||||
"repnz cmpsq": {
|
||||
"ExpectedInstructionCount": 12,
|
||||
"ExpectedInstructionCount": 24,
|
||||
"Comment": "0xa7",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x60",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #3",
|
||||
"cbz x5, #+0x28",
|
||||
"ldr x21, [x11]",
|
||||
"ldr x22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"subs x26, x22, x21",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.ne #-0x24"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldr x26, [x11]",
|
||||
"ldr x27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x8 (8)",
|
||||
"add x10, x10, #0x8 (8)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"b #+0x20",
|
||||
"ldr x26, [x11]",
|
||||
"ldr x27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x8 (8)",
|
||||
"sub x10, x10, #0x8 (8)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"subs x26, x20, x26",
|
||||
"cfinv"
|
||||
]
|
||||
},
|
||||
"test al, 1": {
|
||||
@ -2212,136 +2310,234 @@
|
||||
]
|
||||
},
|
||||
"repz scasb": {
|
||||
"ExpectedInstructionCount": 11,
|
||||
"ExpectedInstructionCount": 25,
|
||||
"Comment": "0xae",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x30",
|
||||
"cbz x5, #+0x28",
|
||||
"ldrb w21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"ldrb w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #24",
|
||||
"cmp w0, w21, lsl #24",
|
||||
"sub w26, w4, w21",
|
||||
"cmp w0, w20, lsl #24",
|
||||
"sub w26, w4, w20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x1 (1)",
|
||||
"b.eq #-0x24",
|
||||
"b #+0x2c",
|
||||
"cbz x5, #+0x28",
|
||||
"ldrb w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #24",
|
||||
"cmp w0, w20, lsl #24",
|
||||
"sub w26, w4, w20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x1 (1)",
|
||||
"b.eq #-0x24"
|
||||
]
|
||||
},
|
||||
"repz scasw": {
|
||||
"ExpectedInstructionCount": 12,
|
||||
"ExpectedInstructionCount": 25,
|
||||
"Comment": "0xaf",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #1",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x30",
|
||||
"cbz x5, #+0x28",
|
||||
"ldrh w21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"ldrh w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #16",
|
||||
"cmp w0, w21, lsl #16",
|
||||
"sub w26, w4, w21",
|
||||
"cmp w0, w20, lsl #16",
|
||||
"sub w26, w4, w20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x2 (2)",
|
||||
"b.eq #-0x24",
|
||||
"b #+0x2c",
|
||||
"cbz x5, #+0x28",
|
||||
"ldrh w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #16",
|
||||
"cmp w0, w20, lsl #16",
|
||||
"sub w26, w4, w20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x2 (2)",
|
||||
"b.eq #-0x24"
|
||||
]
|
||||
},
|
||||
"repz scasd": {
|
||||
"ExpectedInstructionCount": 10,
|
||||
"ExpectedInstructionCount": 21,
|
||||
"Comment": "0xaf",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #2",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x28",
|
||||
"cbz x5, #+0x20",
|
||||
"ldr w21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"subs w26, w4, w21",
|
||||
"ldr w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs w26, w4, w20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x4 (4)",
|
||||
"b.eq #-0x1c",
|
||||
"b #+0x24",
|
||||
"cbz x5, #+0x20",
|
||||
"ldr w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs w26, w4, w20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x4 (4)",
|
||||
"b.eq #-0x1c"
|
||||
]
|
||||
},
|
||||
"repz scasq": {
|
||||
"ExpectedInstructionCount": 10,
|
||||
"ExpectedInstructionCount": 21,
|
||||
"Comment": "0xaf",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #3",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x28",
|
||||
"cbz x5, #+0x20",
|
||||
"ldr x21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"subs x26, x4, x21",
|
||||
"ldr x20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs x26, x4, x20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x8 (8)",
|
||||
"b.eq #-0x1c",
|
||||
"b #+0x24",
|
||||
"cbz x5, #+0x20",
|
||||
"ldr x20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs x26, x4, x20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x8 (8)",
|
||||
"b.eq #-0x1c"
|
||||
]
|
||||
},
|
||||
"repnz scasb": {
|
||||
"ExpectedInstructionCount": 11,
|
||||
"ExpectedInstructionCount": 25,
|
||||
"Comment": "0xae",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x30",
|
||||
"cbz x5, #+0x28",
|
||||
"ldrb w21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"ldrb w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #24",
|
||||
"cmp w0, w21, lsl #24",
|
||||
"sub w26, w4, w21",
|
||||
"cmp w0, w20, lsl #24",
|
||||
"sub w26, w4, w20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x1 (1)",
|
||||
"b.ne #-0x24",
|
||||
"b #+0x2c",
|
||||
"cbz x5, #+0x28",
|
||||
"ldrb w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #24",
|
||||
"cmp w0, w20, lsl #24",
|
||||
"sub w26, w4, w20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x1 (1)",
|
||||
"b.ne #-0x24"
|
||||
]
|
||||
},
|
||||
"repnz scasw": {
|
||||
"ExpectedInstructionCount": 12,
|
||||
"ExpectedInstructionCount": 25,
|
||||
"Comment": "0xaf",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #1",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x30",
|
||||
"cbz x5, #+0x28",
|
||||
"ldrh w21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"ldrh w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #16",
|
||||
"cmp w0, w21, lsl #16",
|
||||
"sub w26, w4, w21",
|
||||
"cmp w0, w20, lsl #16",
|
||||
"sub w26, w4, w20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x2 (2)",
|
||||
"b.ne #-0x24",
|
||||
"b #+0x2c",
|
||||
"cbz x5, #+0x28",
|
||||
"ldrh w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #16",
|
||||
"cmp w0, w20, lsl #16",
|
||||
"sub w26, w4, w20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x2 (2)",
|
||||
"b.ne #-0x24"
|
||||
]
|
||||
},
|
||||
"repnz scasd": {
|
||||
"ExpectedInstructionCount": 10,
|
||||
"ExpectedInstructionCount": 21,
|
||||
"Comment": "0xaf",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #2",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x28",
|
||||
"cbz x5, #+0x20",
|
||||
"ldr w21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"subs w26, w4, w21",
|
||||
"ldr w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs w26, w4, w20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x4 (4)",
|
||||
"b.ne #-0x1c",
|
||||
"b #+0x24",
|
||||
"cbz x5, #+0x20",
|
||||
"ldr w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs w26, w4, w20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x4 (4)",
|
||||
"b.ne #-0x1c"
|
||||
]
|
||||
},
|
||||
"repnz scasq": {
|
||||
"ExpectedInstructionCount": 10,
|
||||
"ExpectedInstructionCount": 21,
|
||||
"Comment": "0xaf",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #3",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x28",
|
||||
"cbz x5, #+0x20",
|
||||
"ldr x21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"subs x26, x4, x21",
|
||||
"ldr x20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs x26, x4, x20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x8 (8)",
|
||||
"b.ne #-0x1c",
|
||||
"b #+0x24",
|
||||
"cbz x5, #+0x20",
|
||||
"ldr x20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs x26, x4, x20",
|
||||
"cfinv",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x8 (8)",
|
||||
"b.ne #-0x1c"
|
||||
]
|
||||
},
|
||||
|
@ -3295,169 +3295,267 @@
|
||||
]
|
||||
},
|
||||
"repz cmpsb": {
|
||||
"ExpectedInstructionCount": 15,
|
||||
"ExpectedInstructionCount": 28,
|
||||
"Comment": "0xa6",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x70",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"cbz x5, #+0x38",
|
||||
"ldrb w21, [x11]",
|
||||
"ldrb w22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"lsl w0, w22, #24",
|
||||
"cmp w0, w21, lsl #24",
|
||||
"sub w26, w22, w21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.eq #-0x34"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldrb w26, [x11]",
|
||||
"ldrb w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x1 (1)",
|
||||
"add x10, x10, #0x1 (1)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"b #+0x20",
|
||||
"ldrb w26, [x11]",
|
||||
"ldrb w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x1 (1)",
|
||||
"sub x10, x10, #0x1 (1)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"lsl w0, w20, #24",
|
||||
"cmp w0, w26, lsl #24",
|
||||
"sub w26, w20, w26",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
},
|
||||
"repz cmpsw": {
|
||||
"ExpectedInstructionCount": 16,
|
||||
"ExpectedInstructionCount": 28,
|
||||
"Comment": "0xa7",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x70",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #1",
|
||||
"cbz x5, #+0x38",
|
||||
"ldrh w21, [x11]",
|
||||
"ldrh w22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"lsl w0, w22, #16",
|
||||
"cmp w0, w21, lsl #16",
|
||||
"sub w26, w22, w21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.eq #-0x34"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldrh w26, [x11]",
|
||||
"ldrh w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x2 (2)",
|
||||
"add x10, x10, #0x2 (2)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"b #+0x20",
|
||||
"ldrh w26, [x11]",
|
||||
"ldrh w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x2 (2)",
|
||||
"sub x10, x10, #0x2 (2)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"lsl w0, w20, #16",
|
||||
"cmp w0, w26, lsl #16",
|
||||
"sub w26, w20, w26",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
},
|
||||
"repz cmpsd": {
|
||||
"ExpectedInstructionCount": 14,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": "0xa7",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x68",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #2",
|
||||
"cbz x5, #+0x30",
|
||||
"ldr w21, [x11]",
|
||||
"ldr w22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"subs w26, w22, w21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.eq #-0x2c"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldr w26, [x11]",
|
||||
"ldr w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x4 (4)",
|
||||
"add x10, x10, #0x4 (4)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"b #+0x20",
|
||||
"ldr w26, [x11]",
|
||||
"ldr w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x4 (4)",
|
||||
"sub x10, x10, #0x4 (4)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"subs w26, w20, w26",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
},
|
||||
"repz cmpsq": {
|
||||
"ExpectedInstructionCount": 14,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": "0xa7",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x68",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #3",
|
||||
"cbz x5, #+0x30",
|
||||
"ldr x21, [x11]",
|
||||
"ldr x22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"subs x26, x22, x21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.eq #-0x2c"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldr x26, [x11]",
|
||||
"ldr x27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x8 (8)",
|
||||
"add x10, x10, #0x8 (8)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"b #+0x20",
|
||||
"ldr x26, [x11]",
|
||||
"ldr x27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x8 (8)",
|
||||
"sub x10, x10, #0x8 (8)",
|
||||
"ccmp x27, x26, #nzcv, ne",
|
||||
"b.eq #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"subs x26, x20, x26",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
},
|
||||
"repnz cmpsb": {
|
||||
"ExpectedInstructionCount": 15,
|
||||
"ExpectedInstructionCount": 28,
|
||||
"Comment": "0xa6",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x70",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"cbz x5, #+0x38",
|
||||
"ldrb w21, [x11]",
|
||||
"ldrb w22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"lsl w0, w22, #24",
|
||||
"cmp w0, w21, lsl #24",
|
||||
"sub w26, w22, w21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.ne #-0x34"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldrb w26, [x11]",
|
||||
"ldrb w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x1 (1)",
|
||||
"add x10, x10, #0x1 (1)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"b #+0x20",
|
||||
"ldrb w26, [x11]",
|
||||
"ldrb w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x1 (1)",
|
||||
"sub x10, x10, #0x1 (1)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"lsl w0, w20, #24",
|
||||
"cmp w0, w26, lsl #24",
|
||||
"sub w26, w20, w26",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
},
|
||||
"repnz cmpsw": {
|
||||
"ExpectedInstructionCount": 16,
|
||||
"ExpectedInstructionCount": 28,
|
||||
"Comment": "0xa7",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x70",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #1",
|
||||
"cbz x5, #+0x38",
|
||||
"ldrh w21, [x11]",
|
||||
"ldrh w22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"lsl w0, w22, #16",
|
||||
"cmp w0, w21, lsl #16",
|
||||
"sub w26, w22, w21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.ne #-0x34"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldrh w26, [x11]",
|
||||
"ldrh w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x2 (2)",
|
||||
"add x10, x10, #0x2 (2)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"b #+0x20",
|
||||
"ldrh w26, [x11]",
|
||||
"ldrh w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x2 (2)",
|
||||
"sub x10, x10, #0x2 (2)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"lsl w0, w20, #16",
|
||||
"cmp w0, w26, lsl #16",
|
||||
"sub w26, w20, w26",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
},
|
||||
"repnz cmpsd": {
|
||||
"ExpectedInstructionCount": 14,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": "0xa7",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x68",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #2",
|
||||
"cbz x5, #+0x30",
|
||||
"ldr w21, [x11]",
|
||||
"ldr w22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"subs w26, w22, w21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.ne #-0x2c"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldr w26, [x11]",
|
||||
"ldr w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x4 (4)",
|
||||
"add x10, x10, #0x4 (4)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"b #+0x20",
|
||||
"ldr w26, [x11]",
|
||||
"ldr w27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x4 (4)",
|
||||
"sub x10, x10, #0x4 (4)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"subs w26, w20, w26",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
},
|
||||
"repnz cmpsq": {
|
||||
"ExpectedInstructionCount": 14,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": "0xa7",
|
||||
"ExpectedArm64ASM": [
|
||||
"cbz x5, #+0x68",
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #3",
|
||||
"cbz x5, #+0x30",
|
||||
"ldr x21, [x11]",
|
||||
"ldr x22, [x10]",
|
||||
"eor w27, w22, w21",
|
||||
"subs x26, x22, x21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x10, x10, x20",
|
||||
"b.ne #-0x2c"
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x24",
|
||||
"ldr x26, [x11]",
|
||||
"ldr x27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"add x11, x11, #0x8 (8)",
|
||||
"add x10, x10, #0x8 (8)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"b #+0x20",
|
||||
"ldr x26, [x11]",
|
||||
"ldr x27, [x10]",
|
||||
"subs x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x8 (8)",
|
||||
"sub x10, x10, #0x8 (8)",
|
||||
"ccmp x27, x26, #nZcv, ne",
|
||||
"b.ne #-0x18",
|
||||
"mov x20, x27",
|
||||
"eor w27, w20, w26",
|
||||
"subs x26, x20, x26",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
},
|
||||
"test al, 1": {
|
||||
@ -3842,55 +3940,90 @@
|
||||
]
|
||||
},
|
||||
"rep lodsb": {
|
||||
"ExpectedInstructionCount": 7,
|
||||
"ExpectedInstructionCount": 17,
|
||||
"Comment": "0xac",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x20",
|
||||
"cbz x5, #+0x18",
|
||||
"ldrb w21, [x10]",
|
||||
"bfxil x4, x21, #0, #8",
|
||||
"ldrb w20, [x10]",
|
||||
"bfxil x4, x20, #0, #8",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x10, x10, x20",
|
||||
"add x10, x10, #0x1 (1)",
|
||||
"b #-0x14",
|
||||
"b #+0x1c",
|
||||
"cbz x5, #+0x18",
|
||||
"ldrb w20, [x10]",
|
||||
"bfxil x4, x20, #0, #8",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x10, x10, #0x1 (1)",
|
||||
"b #-0x14"
|
||||
]
|
||||
},
|
||||
"rep lodsw": {
|
||||
"ExpectedInstructionCount": 8,
|
||||
"ExpectedInstructionCount": 17,
|
||||
"Comment": "0xad",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #1",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x20",
|
||||
"cbz x5, #+0x18",
|
||||
"ldrh w21, [x10]",
|
||||
"bfxil x4, x21, #0, #16",
|
||||
"ldrh w20, [x10]",
|
||||
"bfxil x4, x20, #0, #16",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x10, x10, x20",
|
||||
"add x10, x10, #0x2 (2)",
|
||||
"b #-0x14",
|
||||
"b #+0x1c",
|
||||
"cbz x5, #+0x18",
|
||||
"ldrh w20, [x10]",
|
||||
"bfxil x4, x20, #0, #16",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x10, x10, #0x2 (2)",
|
||||
"b #-0x14"
|
||||
]
|
||||
},
|
||||
"rep lodsd": {
|
||||
"ExpectedInstructionCount": 7,
|
||||
"ExpectedInstructionCount": 15,
|
||||
"Comment": "0xad",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #2",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x1c",
|
||||
"cbz x5, #+0x14",
|
||||
"ldr w4, [x10]",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x10, x10, x20",
|
||||
"add x10, x10, #0x4 (4)",
|
||||
"b #-0x10",
|
||||
"b #+0x18",
|
||||
"cbz x5, #+0x14",
|
||||
"ldr w4, [x10]",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x10, x10, #0x4 (4)",
|
||||
"b #-0x10"
|
||||
]
|
||||
},
|
||||
"rep lodsq": {
|
||||
"ExpectedInstructionCount": 7,
|
||||
"ExpectedInstructionCount": 15,
|
||||
"Comment": "0xad",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #3",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x1c",
|
||||
"cbz x5, #+0x14",
|
||||
"ldr x4, [x10]",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x10, x10, x20",
|
||||
"add x10, x10, #0x8 (8)",
|
||||
"b #-0x10",
|
||||
"b #+0x18",
|
||||
"cbz x5, #+0x14",
|
||||
"ldr x4, [x10]",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x10, x10, #0x8 (8)",
|
||||
"b #-0x10"
|
||||
]
|
||||
},
|
||||
@ -3955,152 +4088,266 @@
|
||||
]
|
||||
},
|
||||
"repz scasb": {
|
||||
"ExpectedInstructionCount": 13,
|
||||
"ExpectedInstructionCount": 29,
|
||||
"Comment": "0xae",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x38",
|
||||
"cbz x5, #+0x30",
|
||||
"ldrb w21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"ldrb w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #24",
|
||||
"cmp w0, w21, lsl #24",
|
||||
"sub w26, w4, w21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"cmp w0, w20, lsl #24",
|
||||
"sub w26, w4, w20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x1 (1)",
|
||||
"b.eq #-0x2c",
|
||||
"b #+0x34",
|
||||
"cbz x5, #+0x30",
|
||||
"ldrb w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #24",
|
||||
"cmp w0, w20, lsl #24",
|
||||
"sub w26, w4, w20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x1 (1)",
|
||||
"b.eq #-0x2c"
|
||||
]
|
||||
},
|
||||
"repz scasw": {
|
||||
"ExpectedInstructionCount": 14,
|
||||
"ExpectedInstructionCount": 29,
|
||||
"Comment": "0xaf",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #1",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x38",
|
||||
"cbz x5, #+0x30",
|
||||
"ldrh w21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"ldrh w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #16",
|
||||
"cmp w0, w21, lsl #16",
|
||||
"sub w26, w4, w21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"cmp w0, w20, lsl #16",
|
||||
"sub w26, w4, w20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x2 (2)",
|
||||
"b.eq #-0x2c",
|
||||
"b #+0x34",
|
||||
"cbz x5, #+0x30",
|
||||
"ldrh w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #16",
|
||||
"cmp w0, w20, lsl #16",
|
||||
"sub w26, w4, w20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x2 (2)",
|
||||
"b.eq #-0x2c"
|
||||
]
|
||||
},
|
||||
"repz scasd": {
|
||||
"ExpectedInstructionCount": 12,
|
||||
"ExpectedInstructionCount": 25,
|
||||
"Comment": "0xaf",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #2",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x30",
|
||||
"cbz x5, #+0x28",
|
||||
"ldr w21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"subs w26, w4, w21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"ldr w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs w26, w4, w20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x4 (4)",
|
||||
"b.eq #-0x24",
|
||||
"b #+0x2c",
|
||||
"cbz x5, #+0x28",
|
||||
"ldr w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs w26, w4, w20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x4 (4)",
|
||||
"b.eq #-0x24"
|
||||
]
|
||||
},
|
||||
"repz scasq": {
|
||||
"ExpectedInstructionCount": 12,
|
||||
"ExpectedInstructionCount": 25,
|
||||
"Comment": "0xaf",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #3",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x30",
|
||||
"cbz x5, #+0x28",
|
||||
"ldr x21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"subs x26, x4, x21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"ldr x20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs x26, x4, x20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x8 (8)",
|
||||
"b.eq #-0x24",
|
||||
"b #+0x2c",
|
||||
"cbz x5, #+0x28",
|
||||
"ldr x20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs x26, x4, x20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x8 (8)",
|
||||
"b.eq #-0x24"
|
||||
]
|
||||
},
|
||||
"repnz scasb": {
|
||||
"ExpectedInstructionCount": 13,
|
||||
"ExpectedInstructionCount": 29,
|
||||
"Comment": "0xae",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x38",
|
||||
"cbz x5, #+0x30",
|
||||
"ldrb w21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"ldrb w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #24",
|
||||
"cmp w0, w21, lsl #24",
|
||||
"sub w26, w4, w21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"cmp w0, w20, lsl #24",
|
||||
"sub w26, w4, w20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x1 (1)",
|
||||
"b.ne #-0x2c",
|
||||
"b #+0x34",
|
||||
"cbz x5, #+0x30",
|
||||
"ldrb w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #24",
|
||||
"cmp w0, w20, lsl #24",
|
||||
"sub w26, w4, w20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x1 (1)",
|
||||
"b.ne #-0x2c"
|
||||
]
|
||||
},
|
||||
"repnz scasw": {
|
||||
"ExpectedInstructionCount": 14,
|
||||
"ExpectedInstructionCount": 29,
|
||||
"Comment": "0xaf",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #1",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x38",
|
||||
"cbz x5, #+0x30",
|
||||
"ldrh w21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"ldrh w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #16",
|
||||
"cmp w0, w21, lsl #16",
|
||||
"sub w26, w4, w21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"cmp w0, w20, lsl #16",
|
||||
"sub w26, w4, w20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x2 (2)",
|
||||
"b.ne #-0x2c",
|
||||
"b #+0x34",
|
||||
"cbz x5, #+0x30",
|
||||
"ldrh w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"lsl w0, w4, #16",
|
||||
"cmp w0, w20, lsl #16",
|
||||
"sub w26, w4, w20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x2 (2)",
|
||||
"b.ne #-0x2c"
|
||||
]
|
||||
},
|
||||
"repnz scasd": {
|
||||
"ExpectedInstructionCount": 12,
|
||||
"ExpectedInstructionCount": 25,
|
||||
"Comment": "0xaf",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #2",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x30",
|
||||
"cbz x5, #+0x28",
|
||||
"ldr w21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"subs w26, w4, w21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"ldr w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs w26, w4, w20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x4 (4)",
|
||||
"b.ne #-0x24",
|
||||
"b #+0x2c",
|
||||
"cbz x5, #+0x28",
|
||||
"ldr w20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs w26, w4, w20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x4 (4)",
|
||||
"b.ne #-0x24"
|
||||
]
|
||||
},
|
||||
"repnz scasq": {
|
||||
"ExpectedInstructionCount": 12,
|
||||
"ExpectedInstructionCount": 25,
|
||||
"Comment": "0xaf",
|
||||
"ExpectedArm64ASM": [
|
||||
"ldrsb x20, [x28, #714]",
|
||||
"lsl x20, x20, #3",
|
||||
"lsr x20, x20, #63",
|
||||
"cbz x20, #+0x8",
|
||||
"b #+0x30",
|
||||
"cbz x5, #+0x28",
|
||||
"ldr x21, [x11]",
|
||||
"eor w27, w4, w21",
|
||||
"subs x26, x4, x21",
|
||||
"mrs x21, nzcv",
|
||||
"eor w21, w21, #0x20000000",
|
||||
"msr nzcv, x21",
|
||||
"ldr x20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs x26, x4, x20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"add x11, x11, x20",
|
||||
"add x11, x11, #0x8 (8)",
|
||||
"b.ne #-0x24",
|
||||
"b #+0x2c",
|
||||
"cbz x5, #+0x28",
|
||||
"ldr x20, [x11]",
|
||||
"eor w27, w4, w20",
|
||||
"subs x26, x4, x20",
|
||||
"mrs x20, nzcv",
|
||||
"eor w20, w20, #0x20000000",
|
||||
"msr nzcv, x20",
|
||||
"sub x5, x5, #0x1 (1)",
|
||||
"sub x11, x11, #0x8 (8)",
|
||||
"b.ne #-0x24"
|
||||
]
|
||||
},
|
||||
|
Loading…
Reference in New Issue
Block a user