Merge pull request #3542 from alyssarosenzweig/ra/rep

Eliminate xblock liveness with rep cmp/lod/scas
This commit is contained in:
Ryan Houdek 2024-04-02 04:24:24 -07:00 committed by GitHub
commit e8abc88702
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 1013 additions and 503 deletions

View File

@ -402,6 +402,24 @@ DEF_OP(CondAddNZCV) {
}
}
DEF_OP(CondSubNZCV) {
auto Op = IROp->C<IR::IROp_CondSubNZCV>();
const auto OpSize = IROp->Size;
LOGMAN_THROW_AA_FMT(OpSize == IR::i32Bit || OpSize == IR::i64Bit, "Unsupported {} size: {}", __func__, OpSize);
const auto EmitSize = OpSize == IR::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
ARMEmitter::StatusFlags Flags = (ARMEmitter::StatusFlags)Op->FalseNZCV;
uint64_t Const = 0;
auto Src1 = GetZeroableReg(Op->Src1);
if (IsInlineConstant(Op->Src2, &Const)) {
ccmp(EmitSize, Src1, Const, Flags, MapSelectCC(Op->Cond));
} else {
ccmp(EmitSize, Src1, GetReg(Op->Src2.ID()), Flags, MapSelectCC(Op->Cond));
}
}
DEF_OP(Neg) {
auto Op = IROp->C<IR::IROp_Neg>();
const uint8_t OpSize = IROp->Size;

View File

@ -3548,75 +3548,90 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
bool REPE = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX;
// read DF once
auto PtrDir = LoadDir(Size);
auto JumpStart = Jump();
// Make sure to start a new block after ending this one
auto LoopStart = CreateNewCodeBlockAfter(GetCurrentBlock());
SetJumpTarget(JumpStart, LoopStart);
SetCurrentCodeBlock(LoopStart);
StartNewBlock();
// If rcx = 0, skip the whole loop.
OrderedNode *Counter = LoadGPRRegister(X86State::REG_RCX);
auto OuterJump = CondJump(Counter, {COND_EQ});
// Can we end the block?
auto CondJump_ = CondJump(Counter, {COND_EQ});
IRPair<IROp_CondJump> InternalCondJump;
auto LoopTail = CreateNewCodeBlockAfter(LoopStart);
SetFalseJumpTarget(CondJump_, LoopTail);
SetCurrentCodeBlock(LoopTail);
auto BeforeLoop = CreateNewCodeBlockAfter(GetCurrentBlock());
SetFalseJumpTarget(OuterJump, BeforeLoop);
SetCurrentCodeBlock(BeforeLoop);
StartNewBlock();
// Working loop
{
OrderedNode *Dest_RSI = LoadGPRRegister(X86State::REG_RSI);
OrderedNode *Dest_RDI = LoadGPRRegister(X86State::REG_RDI);
ForeachDirection([this, Op, Size, REPE](int PtrDir) {
IRPair<IROp_CondJump> InnerJump;
auto JumpIntoLoop = Jump();
// Only ES prefix
Dest_RDI = AppendSegmentOffset(Dest_RDI, 0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);
// Default DS prefix
Dest_RSI = AppendSegmentOffset(Dest_RSI, Op->Flags, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX);
// Setup for the loop
auto LoopHeader = CreateNewCodeBlockAfter(GetCurrentBlock());
SetCurrentCodeBlock(LoopHeader);
StartNewBlock();
SetJumpTarget(JumpIntoLoop, LoopHeader);
auto Src1 = _LoadMemAutoTSO(GPRClass, Size, Dest_RDI, Size);
auto Src2 = _LoadMem(GPRClass, Size, Dest_RSI, Size);
// Working loop
{
OrderedNode *Dest_RSI = LoadGPRRegister(X86State::REG_RSI);
OrderedNode *Dest_RDI = LoadGPRRegister(X86State::REG_RDI);
GenerateFlags_SUB(Op, Src2, Src1);
// Only ES prefix
Dest_RDI = AppendSegmentOffset(Dest_RDI, 0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);
// Default DS prefix
Dest_RSI = AppendSegmentOffset(Dest_RSI, Op->Flags, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX);
// Calculate flags early.
CalculateDeferredFlags();
auto Src1 = _LoadMemAutoTSO(GPRClass, Size, Dest_RDI, Size);
auto Src2 = _LoadMem(GPRClass, Size, Dest_RSI, Size);
OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX);
// We'll calculate PF/AF after the loop, so use them as temporaries here.
_StoreRegister(Src1, false, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
_StoreRegister(Src2, false, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
// Decrement counter
TailCounter = _Sub(OpSize::i64Bit, TailCounter, _Constant(1));
OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX);
// Store the counter since we don't have phis
StoreGPRRegister(X86State::REG_RCX, TailCounter);
// Decrement counter
TailCounter = _SubWithFlags(OpSize::i64Bit, TailCounter, _Constant(1));
// Offset the pointer
Dest_RDI = _Add(OpSize::i64Bit, Dest_RDI, PtrDir);
StoreGPRRegister(X86State::REG_RDI, Dest_RDI);
// Store the counter since we don't have phis
StoreGPRRegister(X86State::REG_RCX, TailCounter);
// Offset second pointer
Dest_RSI = _Add(OpSize::i64Bit, Dest_RSI, PtrDir);
StoreGPRRegister(X86State::REG_RSI, Dest_RSI);
// Offset the pointer
Dest_RDI = _Add(OpSize::i64Bit, Dest_RDI, _Constant(PtrDir * Size));
StoreGPRRegister(X86State::REG_RDI, Dest_RDI);
CalculateDeferredFlags();
InternalCondJump = CondJumpNZCV({REPE ? COND_EQ : COND_NEQ});
// Offset second pointer
Dest_RSI = _Add(OpSize::i64Bit, Dest_RSI, _Constant(PtrDir * Size));
StoreGPRRegister(X86State::REG_RSI, Dest_RSI);
// Jump back to the start if we have more work to do
SetTrueJumpTarget(InternalCondJump, LoopStart);
}
// If TailCounter != 0, compare sources.
// If TailCounter == 0, set ZF iff that would break.
_CondSubNZCV(OpSize::i64Bit, Src2, Src1, {COND_NEQ}, REPE ? 0 : (1 << 2) /* Z */);
CachedNZCV = nullptr;
NZCVDirty = false;
InnerJump = CondJumpNZCV({REPE ? COND_EQ : COND_NEQ});
// Jump back to the start if we have more work to do
SetTrueJumpTarget(InnerJump, LoopHeader);
}
// Make sure to start a new block after ending this one
auto LoopEnd = CreateNewCodeBlockAfter(GetCurrentBlock());
SetFalseJumpTarget(InnerJump, LoopEnd);
SetCurrentCodeBlock(LoopEnd);
StartNewBlock();
});
// Make sure to start a new block after ending this one
auto LoopEnd = CreateNewCodeBlockAfter(LoopTail);
SetTrueJumpTarget(CondJump_, LoopEnd);
{
// Grab the sources from the last iteration so we can set flags.
auto Src1 = _LoadRegister(false, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
auto Src2 = _LoadRegister(false, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
GenerateFlags_SUB(Op, Src2, Src1);
CalculateDeferredFlags();
}
auto Jump_ = Jump();
SetFalseJumpTarget(InternalCondJump, LoopEnd);
SetCurrentCodeBlock(LoopEnd);
auto Exit = CreateNewCodeBlockAfter(GetCurrentBlock());
SetJumpTarget(Jump_, Exit);
SetTrueJumpTarget(OuterJump, Exit);
SetCurrentCodeBlock(Exit);
StartNewBlock();
}
}
@ -3647,65 +3662,64 @@ void OpDispatchBuilder::LODSOp(OpcodeArgs) {
// Calculate flags early. because end of block
CalculateDeferredFlags();
// XXX: Theoretically LODS could be optimized to
// RSI += {-}(RCX * Size)
// RAX = [RSI - Size]
// But this might violate the case of an application scanning pages for read permission and catching the fault
// May or may not matter
ForeachDirection([this, Op, Size](int PtrDir) {
// XXX: Theoretically LODS could be optimized to
// RSI += {-}(RCX * Size)
// RAX = [RSI - Size]
// But this might violate the case of an application scanning pages for read permission and catching the fault
// May or may not matter
// Read DF once
auto PtrDir = LoadDir(Size);
auto JumpStart = Jump();
// Make sure to start a new block after ending this one
auto LoopStart = CreateNewCodeBlockAfter(GetCurrentBlock());
SetJumpTarget(JumpStart, LoopStart);
SetCurrentCodeBlock(LoopStart);
StartNewBlock();
auto JumpStart = Jump();
// Make sure to start a new block after ending this one
auto LoopStart = CreateNewCodeBlockAfter(GetCurrentBlock());
SetJumpTarget(JumpStart, LoopStart);
SetCurrentCodeBlock(LoopStart);
StartNewBlock();
OrderedNode *Counter = LoadGPRRegister(X86State::REG_RCX);
OrderedNode *Counter = LoadGPRRegister(X86State::REG_RCX);
// Can we end the block?
// Can we end the block?
// We leave if RCX = 0
auto CondJump_ = CondJump(Counter, {COND_EQ});
// We leave if RCX = 0
auto CondJump_ = CondJump(Counter, {COND_EQ});
auto LoopTail = CreateNewCodeBlockAfter(LoopStart);
SetFalseJumpTarget(CondJump_, LoopTail);
SetCurrentCodeBlock(LoopTail);
StartNewBlock();
auto LoopTail = CreateNewCodeBlockAfter(LoopStart);
SetFalseJumpTarget(CondJump_, LoopTail);
SetCurrentCodeBlock(LoopTail);
StartNewBlock();
// Working loop
{
OrderedNode *Dest_RSI = LoadGPRRegister(X86State::REG_RSI);
// Working loop
{
OrderedNode *Dest_RSI = LoadGPRRegister(X86State::REG_RSI);
Dest_RSI = AppendSegmentOffset(Dest_RSI, Op->Flags, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX);
Dest_RSI = AppendSegmentOffset(Dest_RSI, Op->Flags, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX);
auto Src = _LoadMemAutoTSO(GPRClass, Size, Dest_RSI, Size);
auto Src = _LoadMemAutoTSO(GPRClass, Size, Dest_RSI, Size);
StoreResult(GPRClass, Op, Src, -1);
StoreResult(GPRClass, Op, Src, -1);
OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX);
OrderedNode *TailDest_RSI = LoadGPRRegister(X86State::REG_RSI);
OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX);
OrderedNode *TailDest_RSI = LoadGPRRegister(X86State::REG_RSI);
// Decrement counter
TailCounter = _Sub(OpSize::i64Bit, TailCounter, _Constant(1));
// Decrement counter
TailCounter = _Sub(OpSize::i64Bit, TailCounter, _Constant(1));
// Store the counter since we don't have phis
StoreGPRRegister(X86State::REG_RCX, TailCounter);
// Store the counter since we don't have phis
StoreGPRRegister(X86State::REG_RCX, TailCounter);
// Offset the pointer
TailDest_RSI = _Add(OpSize::i64Bit, TailDest_RSI, _Constant(PtrDir * Size));
StoreGPRRegister(X86State::REG_RSI, TailDest_RSI);
// Offset the pointer
TailDest_RSI = _Add(OpSize::i64Bit, TailDest_RSI, PtrDir);
StoreGPRRegister(X86State::REG_RSI, TailDest_RSI);
// Jump back to the start, we have more work to do
Jump(LoopStart);
}
// Make sure to start a new block after ending this one
auto LoopEnd = CreateNewCodeBlockAfter(LoopTail);
SetTrueJumpTarget(CondJump_, LoopEnd);
SetCurrentCodeBlock(LoopEnd);
StartNewBlock();
// Jump back to the start, we have more work to do
Jump(LoopStart);
}
// Make sure to start a new block after ending this one
auto LoopEnd = CreateNewCodeBlockAfter(LoopTail);
SetTrueJumpTarget(CondJump_, LoopEnd);
SetCurrentCodeBlock(LoopEnd);
StartNewBlock();
});
}
}
@ -3736,71 +3750,70 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) {
// Calculate flags early. because end of block
CalculateDeferredFlags();
bool REPE = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX;
ForeachDirection([this, Op, Size](int Dir){
bool REPE = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX;
// read DF once
auto PtrDir = LoadDir(Size);
auto JumpStart = Jump();
// Make sure to start a new block after ending this one
auto LoopStart = CreateNewCodeBlockAfter(GetCurrentBlock());
SetJumpTarget(JumpStart, LoopStart);
SetCurrentCodeBlock(LoopStart);
StartNewBlock();
auto JumpStart = Jump();
// Make sure to start a new block after ending this one
auto LoopStart = CreateNewCodeBlockAfter(GetCurrentBlock());
SetJumpTarget(JumpStart, LoopStart);
SetCurrentCodeBlock(LoopStart);
StartNewBlock();
OrderedNode *Counter = LoadGPRRegister(X86State::REG_RCX);
OrderedNode *Counter = LoadGPRRegister(X86State::REG_RCX);
// Can we end the block?
// We leave if RCX = 0
auto CondJump_ = CondJump(Counter, {COND_EQ});
IRPair<IROp_CondJump> InternalCondJump;
// Can we end the block?
// We leave if RCX = 0
auto CondJump_ = CondJump(Counter, {COND_EQ});
IRPair<IROp_CondJump> InternalCondJump;
auto LoopTail = CreateNewCodeBlockAfter(LoopStart);
SetFalseJumpTarget(CondJump_, LoopTail);
SetCurrentCodeBlock(LoopTail);
StartNewBlock();
auto LoopTail = CreateNewCodeBlockAfter(LoopStart);
SetFalseJumpTarget(CondJump_, LoopTail);
SetCurrentCodeBlock(LoopTail);
StartNewBlock();
// Working loop
{
OrderedNode *Dest_RDI = LoadGPRRegister(X86State::REG_RDI);
// Working loop
{
OrderedNode *Dest_RDI = LoadGPRRegister(X86State::REG_RDI);
Dest_RDI = AppendSegmentOffset(Dest_RDI, 0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);
Dest_RDI = AppendSegmentOffset(Dest_RDI, 0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);
auto Src1 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
auto Src2 = _LoadMemAutoTSO(GPRClass, Size, Dest_RDI, Size);
auto Src1 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
auto Src2 = _LoadMemAutoTSO(GPRClass, Size, Dest_RDI, Size);
GenerateFlags_SUB(Op, Src1, Src2);
GenerateFlags_SUB(Op, Src1, Src2);
// Calculate flags early.
CalculateDeferredFlags();
// Calculate flags early.
CalculateDeferredFlags();
OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX);
OrderedNode *TailDest_RDI = LoadGPRRegister(X86State::REG_RDI);
OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX);
OrderedNode *TailDest_RDI = LoadGPRRegister(X86State::REG_RDI);
// Decrement counter
TailCounter = _Sub(OpSize::i64Bit, TailCounter, _Constant(1));
// Decrement counter
TailCounter = _Sub(OpSize::i64Bit, TailCounter, _Constant(1));
// Store the counter since we don't have phis
StoreGPRRegister(X86State::REG_RCX, TailCounter);
// Store the counter since we don't have phis
StoreGPRRegister(X86State::REG_RCX, TailCounter);
// Offset the pointer
TailDest_RDI = _Add(OpSize::i64Bit, TailDest_RDI, _Constant(Dir * Size));
StoreGPRRegister(X86State::REG_RDI, TailDest_RDI);
// Offset the pointer
TailDest_RDI = _Add(OpSize::i64Bit, TailDest_RDI, PtrDir);
StoreGPRRegister(X86State::REG_RDI, TailDest_RDI);
CalculateDeferredFlags();
InternalCondJump = CondJumpNZCV({REPE ? COND_EQ : COND_NEQ});
CalculateDeferredFlags();
InternalCondJump = CondJumpNZCV({REPE ? COND_EQ : COND_NEQ});
// Jump back to the start if we have more work to do
SetTrueJumpTarget(InternalCondJump, LoopStart);
}
// Make sure to start a new block after ending this one
auto LoopEnd = CreateNewCodeBlockAfter(LoopTail);
SetTrueJumpTarget(CondJump_, LoopEnd);
// Jump back to the start if we have more work to do
SetTrueJumpTarget(InternalCondJump, LoopStart);
}
// Make sure to start a new block after ending this one
auto LoopEnd = CreateNewCodeBlockAfter(LoopTail);
SetTrueJumpTarget(CondJump_, LoopEnd);
SetFalseJumpTarget(InternalCondJump, LoopEnd);
SetFalseJumpTarget(InternalCondJump, LoopEnd);
SetCurrentCodeBlock(LoopEnd);
StartNewBlock();
SetCurrentCodeBlock(LoopEnd);
StartNewBlock();
});
}
}

View File

@ -223,6 +223,32 @@ public:
return CanHaveSideEffects;
}
template <typename F>
void ForeachDirection(F&& Routine) {
// Otherwise, prepare to branch.
auto Zero = _Constant(0);
// If the shift is zero, do not touch the flags.
auto ForwardBlock = CreateNewCodeBlockAfter(GetCurrentBlock());
auto BackwardBlock = CreateNewCodeBlockAfter(ForwardBlock);
auto ExitBlock = CreateNewCodeBlockAfter(BackwardBlock);
auto DF = GetRFLAG(X86State::RFLAG_DF_RAW_LOC);
CondJump(DF, Zero, ForwardBlock, BackwardBlock, {COND_EQ});
for (auto D = 0; D < 2; ++D) {
SetCurrentCodeBlock(D ? BackwardBlock : ForwardBlock);
StartNewBlock();
{
Routine(D ? -1 : 1);
Jump(ExitBlock);
}
}
SetCurrentCodeBlock(ExitBlock);
StartNewBlock();
}
OpDispatchBuilder(FEXCore::Context::ContextImpl *ctx);
OpDispatchBuilder(FEXCore::Utils::IntrusivePooledAllocator &Allocator);

View File

@ -1035,6 +1035,14 @@
"Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
]
},
"CondSubNZCV OpSize:#Size, GPR:$Src1, GPR:$Src2, CondClass:$Cond, u8:$FalseNZCV": {
"Desc": ["If condition is true, set NZCV per difference of GPRs, else force NZCV to a constant."],
"HasSideEffects": true,
"DestSize": "Size",
"EmitValidation": [
"Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
]
},
"GPR = AdcWithFlags OpSize:#Size, GPR:$Src1, GPR:$Src2": {
"Desc": ["Adds and set NZCV for the sum of two GPRs and carry-in given as NZCV"],
"HasSideEffects": true,

View File

@ -1164,6 +1164,7 @@ bool ConstProp::ConstantInlining(IREmitter *IREmit, const IRListView& CurrentIR)
break;
}
case OP_CONDADDNZCV:
case OP_CONDSUBNZCV:
{
auto Op = IROp->C<IR::IROp_CondAddNZCV>();

View File

@ -227,6 +227,7 @@ DeadFlagCalculationEliminination::Classify(IROp_Header *IROp)
return {.Read = FlagsForCondClassType(Op->Cond)};
}
case OP_CONDSUBNZCV:
case OP_CONDADDNZCV: {
auto Op = IROp->CW<IR::IROp_CondAddNZCV>();
return {

View File

@ -1950,153 +1950,251 @@
]
},
"repz cmpsb": {
"ExpectedInstructionCount": 13,
"ExpectedInstructionCount": 26,
"Comment": "0xa6",
"ExpectedArm64ASM": [
"cbz x5, #+0x68",
"ldrsb x20, [x28, #714]",
"cbz x5, #+0x30",
"ldrb w21, [x11]",
"ldrb w22, [x10]",
"eor w27, w22, w21",
"lsl w0, w22, #24",
"cmp w0, w21, lsl #24",
"sub w26, w22, w21",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.eq #-0x2c"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldrb w26, [x11]",
"ldrb w27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x1 (1)",
"add x10, x10, #0x1 (1)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"b #+0x20",
"ldrb w26, [x11]",
"ldrb w27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x1 (1)",
"sub x10, x10, #0x1 (1)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"lsl w0, w20, #24",
"cmp w0, w26, lsl #24",
"sub w26, w20, w26",
"cfinv"
]
},
"repz cmpsw": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 26,
"Comment": "0xa7",
"ExpectedArm64ASM": [
"cbz x5, #+0x68",
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #1",
"cbz x5, #+0x30",
"ldrh w21, [x11]",
"ldrh w22, [x10]",
"eor w27, w22, w21",
"lsl w0, w22, #16",
"cmp w0, w21, lsl #16",
"sub w26, w22, w21",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.eq #-0x2c"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldrh w26, [x11]",
"ldrh w27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x2 (2)",
"add x10, x10, #0x2 (2)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"b #+0x20",
"ldrh w26, [x11]",
"ldrh w27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x2 (2)",
"sub x10, x10, #0x2 (2)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"lsl w0, w20, #16",
"cmp w0, w26, lsl #16",
"sub w26, w20, w26",
"cfinv"
]
},
"repz cmpsd": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 24,
"Comment": "0xa7",
"ExpectedArm64ASM": [
"cbz x5, #+0x60",
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #2",
"cbz x5, #+0x28",
"ldr w21, [x11]",
"ldr w22, [x10]",
"eor w27, w22, w21",
"subs w26, w22, w21",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.eq #-0x24"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldr w26, [x11]",
"ldr w27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x4 (4)",
"add x10, x10, #0x4 (4)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"b #+0x20",
"ldr w26, [x11]",
"ldr w27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x4 (4)",
"sub x10, x10, #0x4 (4)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"subs w26, w20, w26",
"cfinv"
]
},
"repz cmpsq": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 24,
"Comment": "0xa7",
"ExpectedArm64ASM": [
"cbz x5, #+0x60",
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #3",
"cbz x5, #+0x28",
"ldr x21, [x11]",
"ldr x22, [x10]",
"eor w27, w22, w21",
"subs x26, x22, x21",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.eq #-0x24"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldr x26, [x11]",
"ldr x27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x8 (8)",
"add x10, x10, #0x8 (8)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"b #+0x20",
"ldr x26, [x11]",
"ldr x27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x8 (8)",
"sub x10, x10, #0x8 (8)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"subs x26, x20, x26",
"cfinv"
]
},
"repnz cmpsb": {
"ExpectedInstructionCount": 13,
"ExpectedInstructionCount": 26,
"Comment": "0xa6",
"ExpectedArm64ASM": [
"cbz x5, #+0x68",
"ldrsb x20, [x28, #714]",
"cbz x5, #+0x30",
"ldrb w21, [x11]",
"ldrb w22, [x10]",
"eor w27, w22, w21",
"lsl w0, w22, #24",
"cmp w0, w21, lsl #24",
"sub w26, w22, w21",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.ne #-0x2c"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldrb w26, [x11]",
"ldrb w27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x1 (1)",
"add x10, x10, #0x1 (1)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"b #+0x20",
"ldrb w26, [x11]",
"ldrb w27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x1 (1)",
"sub x10, x10, #0x1 (1)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"lsl w0, w20, #24",
"cmp w0, w26, lsl #24",
"sub w26, w20, w26",
"cfinv"
]
},
"repnz cmpsw": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 26,
"Comment": "0xa7",
"ExpectedArm64ASM": [
"cbz x5, #+0x68",
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #1",
"cbz x5, #+0x30",
"ldrh w21, [x11]",
"ldrh w22, [x10]",
"eor w27, w22, w21",
"lsl w0, w22, #16",
"cmp w0, w21, lsl #16",
"sub w26, w22, w21",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.ne #-0x2c"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldrh w26, [x11]",
"ldrh w27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x2 (2)",
"add x10, x10, #0x2 (2)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"b #+0x20",
"ldrh w26, [x11]",
"ldrh w27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x2 (2)",
"sub x10, x10, #0x2 (2)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"lsl w0, w20, #16",
"cmp w0, w26, lsl #16",
"sub w26, w20, w26",
"cfinv"
]
},
"repnz cmpsd": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 24,
"Comment": "0xa7",
"ExpectedArm64ASM": [
"cbz x5, #+0x60",
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #2",
"cbz x5, #+0x28",
"ldr w21, [x11]",
"ldr w22, [x10]",
"eor w27, w22, w21",
"subs w26, w22, w21",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.ne #-0x24"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldr w26, [x11]",
"ldr w27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x4 (4)",
"add x10, x10, #0x4 (4)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"b #+0x20",
"ldr w26, [x11]",
"ldr w27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x4 (4)",
"sub x10, x10, #0x4 (4)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"subs w26, w20, w26",
"cfinv"
]
},
"repnz cmpsq": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 24,
"Comment": "0xa7",
"ExpectedArm64ASM": [
"cbz x5, #+0x60",
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #3",
"cbz x5, #+0x28",
"ldr x21, [x11]",
"ldr x22, [x10]",
"eor w27, w22, w21",
"subs x26, x22, x21",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.ne #-0x24"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldr x26, [x11]",
"ldr x27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x8 (8)",
"add x10, x10, #0x8 (8)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"b #+0x20",
"ldr x26, [x11]",
"ldr x27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x8 (8)",
"sub x10, x10, #0x8 (8)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"subs x26, x20, x26",
"cfinv"
]
},
"test al, 1": {
@ -2212,136 +2310,234 @@
]
},
"repz scasb": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 25,
"Comment": "0xae",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x30",
"cbz x5, #+0x28",
"ldrb w21, [x11]",
"eor w27, w4, w21",
"ldrb w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #24",
"cmp w0, w21, lsl #24",
"sub w26, w4, w21",
"cmp w0, w20, lsl #24",
"sub w26, w4, w20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x1 (1)",
"b.eq #-0x24",
"b #+0x2c",
"cbz x5, #+0x28",
"ldrb w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #24",
"cmp w0, w20, lsl #24",
"sub w26, w4, w20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x1 (1)",
"b.eq #-0x24"
]
},
"repz scasw": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 25,
"Comment": "0xaf",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #1",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x30",
"cbz x5, #+0x28",
"ldrh w21, [x11]",
"eor w27, w4, w21",
"ldrh w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #16",
"cmp w0, w21, lsl #16",
"sub w26, w4, w21",
"cmp w0, w20, lsl #16",
"sub w26, w4, w20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x2 (2)",
"b.eq #-0x24",
"b #+0x2c",
"cbz x5, #+0x28",
"ldrh w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #16",
"cmp w0, w20, lsl #16",
"sub w26, w4, w20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x2 (2)",
"b.eq #-0x24"
]
},
"repz scasd": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 21,
"Comment": "0xaf",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #2",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x28",
"cbz x5, #+0x20",
"ldr w21, [x11]",
"eor w27, w4, w21",
"subs w26, w4, w21",
"ldr w20, [x11]",
"eor w27, w4, w20",
"subs w26, w4, w20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x4 (4)",
"b.eq #-0x1c",
"b #+0x24",
"cbz x5, #+0x20",
"ldr w20, [x11]",
"eor w27, w4, w20",
"subs w26, w4, w20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x4 (4)",
"b.eq #-0x1c"
]
},
"repz scasq": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 21,
"Comment": "0xaf",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #3",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x28",
"cbz x5, #+0x20",
"ldr x21, [x11]",
"eor w27, w4, w21",
"subs x26, x4, x21",
"ldr x20, [x11]",
"eor w27, w4, w20",
"subs x26, x4, x20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x8 (8)",
"b.eq #-0x1c",
"b #+0x24",
"cbz x5, #+0x20",
"ldr x20, [x11]",
"eor w27, w4, w20",
"subs x26, x4, x20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x8 (8)",
"b.eq #-0x1c"
]
},
"repnz scasb": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 25,
"Comment": "0xae",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x30",
"cbz x5, #+0x28",
"ldrb w21, [x11]",
"eor w27, w4, w21",
"ldrb w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #24",
"cmp w0, w21, lsl #24",
"sub w26, w4, w21",
"cmp w0, w20, lsl #24",
"sub w26, w4, w20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x1 (1)",
"b.ne #-0x24",
"b #+0x2c",
"cbz x5, #+0x28",
"ldrb w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #24",
"cmp w0, w20, lsl #24",
"sub w26, w4, w20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x1 (1)",
"b.ne #-0x24"
]
},
"repnz scasw": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 25,
"Comment": "0xaf",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #1",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x30",
"cbz x5, #+0x28",
"ldrh w21, [x11]",
"eor w27, w4, w21",
"ldrh w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #16",
"cmp w0, w21, lsl #16",
"sub w26, w4, w21",
"cmp w0, w20, lsl #16",
"sub w26, w4, w20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x2 (2)",
"b.ne #-0x24",
"b #+0x2c",
"cbz x5, #+0x28",
"ldrh w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #16",
"cmp w0, w20, lsl #16",
"sub w26, w4, w20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x2 (2)",
"b.ne #-0x24"
]
},
"repnz scasd": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 21,
"Comment": "0xaf",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #2",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x28",
"cbz x5, #+0x20",
"ldr w21, [x11]",
"eor w27, w4, w21",
"subs w26, w4, w21",
"ldr w20, [x11]",
"eor w27, w4, w20",
"subs w26, w4, w20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x4 (4)",
"b.ne #-0x1c",
"b #+0x24",
"cbz x5, #+0x20",
"ldr w20, [x11]",
"eor w27, w4, w20",
"subs w26, w4, w20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x4 (4)",
"b.ne #-0x1c"
]
},
"repnz scasq": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 21,
"Comment": "0xaf",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #3",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x28",
"cbz x5, #+0x20",
"ldr x21, [x11]",
"eor w27, w4, w21",
"subs x26, x4, x21",
"ldr x20, [x11]",
"eor w27, w4, w20",
"subs x26, x4, x20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x8 (8)",
"b.ne #-0x1c",
"b #+0x24",
"cbz x5, #+0x20",
"ldr x20, [x11]",
"eor w27, w4, w20",
"subs x26, x4, x20",
"cfinv",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x8 (8)",
"b.ne #-0x1c"
]
},

View File

@ -3295,169 +3295,267 @@
]
},
"repz cmpsb": {
"ExpectedInstructionCount": 15,
"ExpectedInstructionCount": 28,
"Comment": "0xa6",
"ExpectedArm64ASM": [
"cbz x5, #+0x70",
"ldrsb x20, [x28, #714]",
"cbz x5, #+0x38",
"ldrb w21, [x11]",
"ldrb w22, [x10]",
"eor w27, w22, w21",
"lsl w0, w22, #24",
"cmp w0, w21, lsl #24",
"sub w26, w22, w21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.eq #-0x34"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldrb w26, [x11]",
"ldrb w27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x1 (1)",
"add x10, x10, #0x1 (1)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"b #+0x20",
"ldrb w26, [x11]",
"ldrb w27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x1 (1)",
"sub x10, x10, #0x1 (1)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"lsl w0, w20, #24",
"cmp w0, w26, lsl #24",
"sub w26, w20, w26",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20"
]
},
"repz cmpsw": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 28,
"Comment": "0xa7",
"ExpectedArm64ASM": [
"cbz x5, #+0x70",
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #1",
"cbz x5, #+0x38",
"ldrh w21, [x11]",
"ldrh w22, [x10]",
"eor w27, w22, w21",
"lsl w0, w22, #16",
"cmp w0, w21, lsl #16",
"sub w26, w22, w21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.eq #-0x34"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldrh w26, [x11]",
"ldrh w27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x2 (2)",
"add x10, x10, #0x2 (2)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"b #+0x20",
"ldrh w26, [x11]",
"ldrh w27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x2 (2)",
"sub x10, x10, #0x2 (2)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"lsl w0, w20, #16",
"cmp w0, w26, lsl #16",
"sub w26, w20, w26",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20"
]
},
"repz cmpsd": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 26,
"Comment": "0xa7",
"ExpectedArm64ASM": [
"cbz x5, #+0x68",
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #2",
"cbz x5, #+0x30",
"ldr w21, [x11]",
"ldr w22, [x10]",
"eor w27, w22, w21",
"subs w26, w22, w21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.eq #-0x2c"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldr w26, [x11]",
"ldr w27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x4 (4)",
"add x10, x10, #0x4 (4)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"b #+0x20",
"ldr w26, [x11]",
"ldr w27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x4 (4)",
"sub x10, x10, #0x4 (4)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"subs w26, w20, w26",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20"
]
},
"repz cmpsq": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 26,
"Comment": "0xa7",
"ExpectedArm64ASM": [
"cbz x5, #+0x68",
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #3",
"cbz x5, #+0x30",
"ldr x21, [x11]",
"ldr x22, [x10]",
"eor w27, w22, w21",
"subs x26, x22, x21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.eq #-0x2c"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldr x26, [x11]",
"ldr x27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x8 (8)",
"add x10, x10, #0x8 (8)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"b #+0x20",
"ldr x26, [x11]",
"ldr x27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x8 (8)",
"sub x10, x10, #0x8 (8)",
"ccmp x27, x26, #nzcv, ne",
"b.eq #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"subs x26, x20, x26",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20"
]
},
"repnz cmpsb": {
"ExpectedInstructionCount": 15,
"ExpectedInstructionCount": 28,
"Comment": "0xa6",
"ExpectedArm64ASM": [
"cbz x5, #+0x70",
"ldrsb x20, [x28, #714]",
"cbz x5, #+0x38",
"ldrb w21, [x11]",
"ldrb w22, [x10]",
"eor w27, w22, w21",
"lsl w0, w22, #24",
"cmp w0, w21, lsl #24",
"sub w26, w22, w21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.ne #-0x34"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldrb w26, [x11]",
"ldrb w27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x1 (1)",
"add x10, x10, #0x1 (1)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"b #+0x20",
"ldrb w26, [x11]",
"ldrb w27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x1 (1)",
"sub x10, x10, #0x1 (1)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"lsl w0, w20, #24",
"cmp w0, w26, lsl #24",
"sub w26, w20, w26",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20"
]
},
"repnz cmpsw": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 28,
"Comment": "0xa7",
"ExpectedArm64ASM": [
"cbz x5, #+0x70",
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #1",
"cbz x5, #+0x38",
"ldrh w21, [x11]",
"ldrh w22, [x10]",
"eor w27, w22, w21",
"lsl w0, w22, #16",
"cmp w0, w21, lsl #16",
"sub w26, w22, w21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.ne #-0x34"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldrh w26, [x11]",
"ldrh w27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x2 (2)",
"add x10, x10, #0x2 (2)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"b #+0x20",
"ldrh w26, [x11]",
"ldrh w27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x2 (2)",
"sub x10, x10, #0x2 (2)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"lsl w0, w20, #16",
"cmp w0, w26, lsl #16",
"sub w26, w20, w26",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20"
]
},
"repnz cmpsd": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 26,
"Comment": "0xa7",
"ExpectedArm64ASM": [
"cbz x5, #+0x68",
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #2",
"cbz x5, #+0x30",
"ldr w21, [x11]",
"ldr w22, [x10]",
"eor w27, w22, w21",
"subs w26, w22, w21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.ne #-0x2c"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldr w26, [x11]",
"ldr w27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x4 (4)",
"add x10, x10, #0x4 (4)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"b #+0x20",
"ldr w26, [x11]",
"ldr w27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x4 (4)",
"sub x10, x10, #0x4 (4)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"subs w26, w20, w26",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20"
]
},
"repnz cmpsq": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 26,
"Comment": "0xa7",
"ExpectedArm64ASM": [
"cbz x5, #+0x68",
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #3",
"cbz x5, #+0x30",
"ldr x21, [x11]",
"ldr x22, [x10]",
"eor w27, w22, w21",
"subs x26, x22, x21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x10, x10, x20",
"b.ne #-0x2c"
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x24",
"ldr x26, [x11]",
"ldr x27, [x10]",
"subs x5, x5, #0x1 (1)",
"add x11, x11, #0x8 (8)",
"add x10, x10, #0x8 (8)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"b #+0x20",
"ldr x26, [x11]",
"ldr x27, [x10]",
"subs x5, x5, #0x1 (1)",
"sub x11, x11, #0x8 (8)",
"sub x10, x10, #0x8 (8)",
"ccmp x27, x26, #nZcv, ne",
"b.ne #-0x18",
"mov x20, x27",
"eor w27, w20, w26",
"subs x26, x20, x26",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20"
]
},
"test al, 1": {
@ -3842,55 +3940,90 @@
]
},
"rep lodsb": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 17,
"Comment": "0xac",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x20",
"cbz x5, #+0x18",
"ldrb w21, [x10]",
"bfxil x4, x21, #0, #8",
"ldrb w20, [x10]",
"bfxil x4, x20, #0, #8",
"sub x5, x5, #0x1 (1)",
"add x10, x10, x20",
"add x10, x10, #0x1 (1)",
"b #-0x14",
"b #+0x1c",
"cbz x5, #+0x18",
"ldrb w20, [x10]",
"bfxil x4, x20, #0, #8",
"sub x5, x5, #0x1 (1)",
"sub x10, x10, #0x1 (1)",
"b #-0x14"
]
},
"rep lodsw": {
"ExpectedInstructionCount": 8,
"ExpectedInstructionCount": 17,
"Comment": "0xad",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #1",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x20",
"cbz x5, #+0x18",
"ldrh w21, [x10]",
"bfxil x4, x21, #0, #16",
"ldrh w20, [x10]",
"bfxil x4, x20, #0, #16",
"sub x5, x5, #0x1 (1)",
"add x10, x10, x20",
"add x10, x10, #0x2 (2)",
"b #-0x14",
"b #+0x1c",
"cbz x5, #+0x18",
"ldrh w20, [x10]",
"bfxil x4, x20, #0, #16",
"sub x5, x5, #0x1 (1)",
"sub x10, x10, #0x2 (2)",
"b #-0x14"
]
},
"rep lodsd": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 15,
"Comment": "0xad",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #2",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x1c",
"cbz x5, #+0x14",
"ldr w4, [x10]",
"sub x5, x5, #0x1 (1)",
"add x10, x10, x20",
"add x10, x10, #0x4 (4)",
"b #-0x10",
"b #+0x18",
"cbz x5, #+0x14",
"ldr w4, [x10]",
"sub x5, x5, #0x1 (1)",
"sub x10, x10, #0x4 (4)",
"b #-0x10"
]
},
"rep lodsq": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 15,
"Comment": "0xad",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #3",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x1c",
"cbz x5, #+0x14",
"ldr x4, [x10]",
"sub x5, x5, #0x1 (1)",
"add x10, x10, x20",
"add x10, x10, #0x8 (8)",
"b #-0x10",
"b #+0x18",
"cbz x5, #+0x14",
"ldr x4, [x10]",
"sub x5, x5, #0x1 (1)",
"sub x10, x10, #0x8 (8)",
"b #-0x10"
]
},
@ -3955,152 +4088,266 @@
]
},
"repz scasb": {
"ExpectedInstructionCount": 13,
"ExpectedInstructionCount": 29,
"Comment": "0xae",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x38",
"cbz x5, #+0x30",
"ldrb w21, [x11]",
"eor w27, w4, w21",
"ldrb w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #24",
"cmp w0, w21, lsl #24",
"sub w26, w4, w21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"cmp w0, w20, lsl #24",
"sub w26, w4, w20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x1 (1)",
"b.eq #-0x2c",
"b #+0x34",
"cbz x5, #+0x30",
"ldrb w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #24",
"cmp w0, w20, lsl #24",
"sub w26, w4, w20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x1 (1)",
"b.eq #-0x2c"
]
},
"repz scasw": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 29,
"Comment": "0xaf",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #1",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x38",
"cbz x5, #+0x30",
"ldrh w21, [x11]",
"eor w27, w4, w21",
"ldrh w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #16",
"cmp w0, w21, lsl #16",
"sub w26, w4, w21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"cmp w0, w20, lsl #16",
"sub w26, w4, w20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x2 (2)",
"b.eq #-0x2c",
"b #+0x34",
"cbz x5, #+0x30",
"ldrh w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #16",
"cmp w0, w20, lsl #16",
"sub w26, w4, w20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x2 (2)",
"b.eq #-0x2c"
]
},
"repz scasd": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 25,
"Comment": "0xaf",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #2",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x30",
"cbz x5, #+0x28",
"ldr w21, [x11]",
"eor w27, w4, w21",
"subs w26, w4, w21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"ldr w20, [x11]",
"eor w27, w4, w20",
"subs w26, w4, w20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x4 (4)",
"b.eq #-0x24",
"b #+0x2c",
"cbz x5, #+0x28",
"ldr w20, [x11]",
"eor w27, w4, w20",
"subs w26, w4, w20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x4 (4)",
"b.eq #-0x24"
]
},
"repz scasq": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 25,
"Comment": "0xaf",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #3",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x30",
"cbz x5, #+0x28",
"ldr x21, [x11]",
"eor w27, w4, w21",
"subs x26, x4, x21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"ldr x20, [x11]",
"eor w27, w4, w20",
"subs x26, x4, x20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x8 (8)",
"b.eq #-0x24",
"b #+0x2c",
"cbz x5, #+0x28",
"ldr x20, [x11]",
"eor w27, w4, w20",
"subs x26, x4, x20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x8 (8)",
"b.eq #-0x24"
]
},
"repnz scasb": {
"ExpectedInstructionCount": 13,
"ExpectedInstructionCount": 29,
"Comment": "0xae",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x38",
"cbz x5, #+0x30",
"ldrb w21, [x11]",
"eor w27, w4, w21",
"ldrb w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #24",
"cmp w0, w21, lsl #24",
"sub w26, w4, w21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"cmp w0, w20, lsl #24",
"sub w26, w4, w20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x1 (1)",
"b.ne #-0x2c",
"b #+0x34",
"cbz x5, #+0x30",
"ldrb w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #24",
"cmp w0, w20, lsl #24",
"sub w26, w4, w20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x1 (1)",
"b.ne #-0x2c"
]
},
"repnz scasw": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 29,
"Comment": "0xaf",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #1",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x38",
"cbz x5, #+0x30",
"ldrh w21, [x11]",
"eor w27, w4, w21",
"ldrh w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #16",
"cmp w0, w21, lsl #16",
"sub w26, w4, w21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"cmp w0, w20, lsl #16",
"sub w26, w4, w20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x2 (2)",
"b.ne #-0x2c",
"b #+0x34",
"cbz x5, #+0x30",
"ldrh w20, [x11]",
"eor w27, w4, w20",
"lsl w0, w4, #16",
"cmp w0, w20, lsl #16",
"sub w26, w4, w20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x2 (2)",
"b.ne #-0x2c"
]
},
"repnz scasd": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 25,
"Comment": "0xaf",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #2",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x30",
"cbz x5, #+0x28",
"ldr w21, [x11]",
"eor w27, w4, w21",
"subs w26, w4, w21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"ldr w20, [x11]",
"eor w27, w4, w20",
"subs w26, w4, w20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x4 (4)",
"b.ne #-0x24",
"b #+0x2c",
"cbz x5, #+0x28",
"ldr w20, [x11]",
"eor w27, w4, w20",
"subs w26, w4, w20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x4 (4)",
"b.ne #-0x24"
]
},
"repnz scasq": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 25,
"Comment": "0xaf",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"lsl x20, x20, #3",
"lsr x20, x20, #63",
"cbz x20, #+0x8",
"b #+0x30",
"cbz x5, #+0x28",
"ldr x21, [x11]",
"eor w27, w4, w21",
"subs x26, x4, x21",
"mrs x21, nzcv",
"eor w21, w21, #0x20000000",
"msr nzcv, x21",
"ldr x20, [x11]",
"eor w27, w4, w20",
"subs x26, x4, x20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"add x11, x11, #0x8 (8)",
"b.ne #-0x24",
"b #+0x2c",
"cbz x5, #+0x28",
"ldr x20, [x11]",
"eor w27, w4, w20",
"subs x26, x4, x20",
"mrs x20, nzcv",
"eor w20, w20, #0x20000000",
"msr nzcv, x20",
"sub x5, x5, #0x1 (1)",
"sub x11, x11, #0x8 (8)",
"b.ne #-0x24"
]
},