Merge pull request #3154 from alyssarosenzweig/opt/smol-carry

Optimize 8/16-bit CF calculation
This commit is contained in:
Ryan Houdek 2023-09-26 05:49:07 -07:00 committed by GitHub
commit 8b3881b5db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 231 additions and 410 deletions

View File

@ -322,7 +322,6 @@ void OpDispatchBuilder::CallbackReturnOp(OpcodeArgs) {
}
void OpDispatchBuilder::SecondaryALUOp(OpcodeArgs) {
bool RequiresMask = false;
FEXCore::IR::IROps IROp;
#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg))
switch (Op->OP) {
@ -330,7 +329,6 @@ void OpDispatchBuilder::SecondaryALUOp(OpcodeArgs) {
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 0):
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 0):
IROp = FEXCore::IR::IROps::OP_ADD;
RequiresMask = true;
break;
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 1):
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 1):
@ -346,7 +344,6 @@ void OpDispatchBuilder::SecondaryALUOp(OpcodeArgs) {
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 5):
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 5):
IROp = FEXCore::IR::IROps::OP_SUB;
RequiresMask = true;
break;
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 6):
case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 6):
@ -411,11 +408,6 @@ void OpDispatchBuilder::SecondaryALUOp(OpcodeArgs) {
StoreResult(GPRClass, Op, Result, -1);
}
// Store result masks, but we need to
if (RequiresMask && Size < 4) {
Result = _Bfe(IR::SizeToOpSize(std::max<uint8_t>(4u, Size)), Size * 8, 0, Result);
}
// Flags set
{
switch (IROp) {
@ -1452,10 +1444,6 @@ void OpDispatchBuilder::CMPOp(OpcodeArgs) {
auto ALUOp = _Sub(Size == 8 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src);
OrderedNode *Result = ALUOp;
if (Size < 4) {
Result = _Bfe(IR::SizeToOpSize(std::max<uint8_t>(4u, Size)), Size * 8, 0, ALUOp);
}
GenerateFlags_SUB(Op, Result, Dest, Src);
flagsOp = SelectionFlag::CMP;
@ -3427,10 +3415,6 @@ void OpDispatchBuilder::XADDOp(OpcodeArgs) {
// Calculated value gets stored in dst (order is important if dst is same as src)
StoreResult(GPRClass, Op, Result, -1);
if (Size < 32) {
Result = _Bfe(OpSize::i32Bit, Size, 0, Result);
}
GenerateFlags_ADD(Op, Result, Dest, Src);
}
else {
@ -3440,10 +3424,6 @@ void OpDispatchBuilder::XADDOp(OpcodeArgs) {
StoreResult(GPRClass, Op, Op->Src[0], Before, -1);
Result = _Add(OpSize, Before, Src); // Seperate result just for flags
if (Size < 32) {
Result = _Bfe(OpSize::i32Bit, Size, 0, Result);
}
GenerateFlags_ADD(Op, Result, Before, Src);
}
}
@ -3859,9 +3839,6 @@ void OpDispatchBuilder::INCOp(OpcodeArgs) {
StoreResult(GPRClass, Op, Result, -1);
}
if (Size < 32) {
Result = _Bfe(OpSize::i32Bit, Size, 0, Result);
}
GenerateFlags_ADD(Op, Result, Dest, OneConst, false);
}
@ -3892,9 +3869,6 @@ void OpDispatchBuilder::DECOp(OpcodeArgs) {
if (!IsLocked) {
StoreResult(GPRClass, Op, Result, -1);
}
if (Size < 32) {
Result = _Bfe(OpSize::i32Bit, Size, 0, Result);
}
GenerateFlags_SUB(Op, Result, Dest, OneConst, false);
}
@ -4032,9 +4006,6 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
auto Src2 = _LoadMemAutoTSO(GPRClass, Size, Dest_RSI, Size);
OrderedNode* Result = _Sub(Size == 8 ? OpSize::i64Bit : OpSize::i32Bit, Src2, Src1);
if (Size < 4)
Result = _Bfe(OpSize::i32Bit, Size * 8, 0, Result);
GenerateFlags_SUB(Op, Result, Src2, Src1);
auto DF = GetRFLAG(FEXCore::X86State::RFLAG_DF_LOC);
@ -4094,9 +4065,6 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
auto Src2 = _LoadMem(GPRClass, Size, Dest_RSI, Size);
OrderedNode* Result = _Sub(Size == 8 ? OpSize::i64Bit : OpSize::i32Bit, Src2, Src1);
if (Size < 4)
Result = _Bfe(OpSize::i32Bit, Size * 8, 0, Result);
GenerateFlags_SUB(Op, Result, Src2, Src1);
// Calculate flags early.
@ -4259,8 +4227,6 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) {
auto Src2 = _LoadMemAutoTSO(GPRClass, Size, Dest_RDI, Size);
OrderedNode* Result = _Sub(Size == 8 ? OpSize::i64Bit : OpSize::i32Bit, Src1, Src2);
if (Size < 4)
Result = _Bfe(OpSize::i32Bit, Size * 8, 0, Result);
GenerateFlags_SUB(Op, Result, Src1, Src2);
auto SizeConst = _Constant(Size);
@ -4322,9 +4288,6 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) {
auto Src2 = _LoadMemAutoTSO(GPRClass, Size, Dest_RDI, Size);
OrderedNode* Result = _Sub(Size == 8 ? OpSize::i64Bit : OpSize::i32Bit, Src1, Src2);
if (Size < 4)
Result = _Bfe(OpSize::i32Bit, Size * 8, 0, Result);
GenerateFlags_SUB(Op, Result, Src1, Src2);
// Calculate flags early.
@ -4435,9 +4398,6 @@ void OpDispatchBuilder::NEGOp(OpcodeArgs) {
StoreResult(GPRClass, Op, Result, -1);
}
if (Size < 4)
Result = _Bfe(OpSize::i32Bit, Size * 8, 0, Result);
GenerateFlags_SUB(Op, Result, ZeroConst, Dest);
}
@ -4672,13 +4632,7 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) {
StoreResult(GPRClass, Op, DestResult, -1);
}
const auto Size = GetDstBitSize(Op);
OrderedNode *Result = _Sub(IR::SizeToOpSize(GPRSize), Src3Lower, CASResult);
if (Size < 32) {
Result = _Bfe(OpSize::i64Bit, Size, 0, Result);
}
GenerateFlags_SUB(Op, Result, Src3Lower, CASResult);
}
else {
@ -4721,10 +4675,6 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) {
const auto Size = GetDstBitSize(Op);
OrderedNode *Result = _Sub(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Src3Lower, CASResult);
if (Size < 32) {
Result = _Bfe(OpSize::i32Bit, Size, 0, Result);
}
GenerateFlags_SUB(Op, Result, Src3Lower, CASResult);
}
}
@ -5476,7 +5426,7 @@ void OpDispatchBuilder::MOVGPRNTOp(OpcodeArgs) {
StoreResult(GPRClass, Op, Src, 1, MemoryAccessType::ACCESS_STREAM);
}
void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp, bool RequiresMask) {
void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp) {
auto Size = GetDstSize(Op);
const auto OpSize = Size == 8 ? OpSize::i64Bit : OpSize::i32Bit;
@ -5527,10 +5477,6 @@ void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCor
StoreResult(GPRClass, Op, Result, -1);
}
if (RequiresMask && Size < 4) {
Result = _Bfe(OpSize::i32Bit, Size * 8, 0, Result);
}
// Flags set
{
switch (ALUIROp) {
@ -5551,9 +5497,9 @@ void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCor
}
}
template<FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp, bool RequiresMask>
template<FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp>
void OpDispatchBuilder::ALUOp(OpcodeArgs) {
ALUOpImpl(Op, ALUIROp, AtomicFetchOp, RequiresMask);
ALUOpImpl(Op, ALUIROp, AtomicFetchOp);
}
void OpDispatchBuilder::INTOp(OpcodeArgs) {
@ -6342,19 +6288,19 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
void InstallOpcodeHandlers(Context::OperatingMode Mode) {
constexpr std::tuple<uint8_t, uint8_t, X86Tables::OpDispatchPtr> BaseOpTable[] = {
// Instructions
{0x00, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_ADD, FEXCore::IR::IROps::OP_ATOMICFETCHADD, true>},
{0x00, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_ADD, FEXCore::IR::IROps::OP_ATOMICFETCHADD>},
{0x08, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_OR, FEXCore::IR::IROps::OP_ATOMICFETCHOR, false>},
{0x08, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_OR, FEXCore::IR::IROps::OP_ATOMICFETCHOR>},
{0x10, 6, &OpDispatchBuilder::ADCOp<0>},
{0x18, 6, &OpDispatchBuilder::SBBOp<0, true>},
{0x20, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_AND, FEXCore::IR::IROps::OP_ATOMICFETCHAND, false>},
{0x20, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_AND, FEXCore::IR::IROps::OP_ATOMICFETCHAND>},
{0x28, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_SUB, FEXCore::IR::IROps::OP_ATOMICFETCHSUB, true>},
{0x28, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_SUB, FEXCore::IR::IROps::OP_ATOMICFETCHSUB>},
{0x30, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_XOR, FEXCore::IR::IROps::OP_ATOMICFETCHXOR, false>},
{0x30, 6, &OpDispatchBuilder::ALUOp<FEXCore::IR::IROps::OP_XOR, FEXCore::IR::IROps::OP_ATOMICFETCHXOR>},
{0x38, 6, &OpDispatchBuilder::CMPOp<0>},
{0x50, 8, &OpDispatchBuilder::PUSHREGOp},

View File

@ -187,7 +187,7 @@ public:
void MOVGPRNTOp(OpcodeArgs);
void MOVVectorOp(OpcodeArgs);
void MOVVectorNTOp(OpcodeArgs);
template<FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp, bool RequiresMask>
template<FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp>
void ALUOp(OpcodeArgs);
void INTOp(OpcodeArgs);
void SyscallOp(OpcodeArgs);
@ -881,7 +881,7 @@ private:
// Used during new op bringup
bool ShouldDump{false};
void ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp, bool RequiresMask);
void ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp);
// Opcode helpers for generalizing behavior across VEX and non-VEX variants.

View File

@ -556,8 +556,6 @@ void OpDispatchBuilder::CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, Or
}
void OpDispatchBuilder::CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF) {
auto Zero = _Constant(0);
auto One = _Constant(1);
auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
CalculateAF(OpSize, Res, Src1, Src2);
@ -576,10 +574,9 @@ void OpDispatchBuilder::CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Res, Or
// CF
if (UpdateCF) {
auto SelectOp = _Select(FEXCore::IR::COND_ULT,
Src1, Src2, One, Zero);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(SelectOp);
// Grab carry bit from unmasked output.
auto Bfe = _Bfe(OpSize::i32Bit, 1, SrcSize * 8, Res);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(Bfe);
}
CalculateOF(SrcSize, Res, Src1, Src2, true);
@ -591,8 +588,6 @@ void OpDispatchBuilder::CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Res, Or
}
void OpDispatchBuilder::CalculateFlags_ADD(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF) {
auto Zero = _Constant(0);
auto One = _Constant(1);
auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
CalculateAF(OpSize, Res, Src1, Src2);
@ -610,9 +605,9 @@ void OpDispatchBuilder::CalculateFlags_ADD(uint8_t SrcSize, OrderedNode *Res, Or
// CF
if (UpdateCF) {
auto SelectOp = _Select(FEXCore::IR::COND_ULT, Res, Src2, One, Zero);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(SelectOp);
// Grab carry bit from unmasked output
auto Bfe = _Bfe(OpSize::i32Bit, 1, SrcSize * 8, Res);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(Bfe);
}
CalculateOF(SrcSize, Res, Src1, Src2, false);

View File

@ -9,7 +9,7 @@
},
"Instructions": {
"add bl, cl": {
"ExpectedInstructionCount": 20,
"ExpectedInstructionCount": 18,
"Optimal": "No",
"Comment": "0x00",
"ExpectedArm64ASM": [
@ -17,15 +17,13 @@
"uxtb w21, w7",
"add w22, w21, w20",
"bfxil x7, x22, #0, #8",
"uxtb w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #24",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w22, w20",
"cset x24, lo",
"ubfx w24, w22, #8, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -36,7 +34,7 @@
]
},
"add bx, cx": {
"ExpectedInstructionCount": 20,
"ExpectedInstructionCount": 18,
"Optimal": "No",
"Comment": "0x01",
"ExpectedArm64ASM": [
@ -44,15 +42,13 @@
"uxth w21, w7",
"add w22, w21, w20",
"bfxil x7, x22, #0, #16",
"uxth w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w22, w20",
"cset x24, lo",
"ubfx w24, w22, #16, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -94,7 +90,7 @@
]
},
"db 0x02, 0xcb": {
"ExpectedInstructionCount": 20,
"ExpectedInstructionCount": 18,
"Optimal": "No",
"Comment": [
"0x02",
@ -105,15 +101,13 @@
"uxtb w21, w5",
"add w22, w21, w20",
"bfxil x5, x22, #0, #8",
"uxtb w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #24",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w22, w20",
"cset x24, lo",
"ubfx w24, w22, #8, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -124,7 +118,7 @@
]
},
"db 0x66, 0x03, 0xcb": {
"ExpectedInstructionCount": 20,
"ExpectedInstructionCount": 18,
"Optimal": "No",
"Comment": [
"0x03",
@ -135,15 +129,13 @@
"uxth w21, w5",
"add w22, w21, w20",
"bfxil x5, x22, #0, #16",
"uxth w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w22, w20",
"cset x24, lo",
"ubfx w24, w22, #16, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -191,21 +183,19 @@
]
},
"add al, 1": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": "0x04",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"add w21, w20, #0x1 (1)",
"bfxil x4, x21, #0, #8",
"uxtb w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #24",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w21, #0x1 (1)",
"cset x23, lo",
"ubfx w23, w21, #8, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w21, w20",
"ubfx w20, w20, #7, #1",
@ -214,21 +204,19 @@
]
},
"add ax, 1": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": "0x05",
"ExpectedArm64ASM": [
"uxth w20, w4",
"add w21, w20, #0x1 (1)",
"bfxil x4, x21, #0, #16",
"uxth w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #16",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w21, #0x1 (1)",
"cset x23, lo",
"ubfx w23, w21, #16, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w21, w20",
"ubfx w20, w20, #15, #1",
@ -265,22 +253,20 @@
]
},
"add al, -1": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 15,
"Optimal": "No",
"Comment": "0x04",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"add w21, w20, #0xff (255)",
"bfxil x4, x21, #0, #8",
"uxtb w21, w21",
"eor w22, w20, #0xff",
"strb w22, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #24",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w21, #0xff (255)",
"cset x23, lo",
"ubfx w23, w21, #8, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w20, w21",
"ubfx w20, w20, #7, #1",
@ -289,27 +275,25 @@
]
},
"add ax, -1": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "0x05",
"ExpectedArm64ASM": [
"mov w20, #0xffff",
"uxth w21, w4",
"add w22, w21, w20",
"bfxil x4, x22, #0, #16",
"uxth w22, w22",
"eor w23, w21, #0xffff",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w22, w20",
"cset x20, lo",
"orr w20, w23, w20, lsl #29",
"bic w21, w21, w22",
"ubfx w21, w21, #15, #1",
"orr w20, w20, w21, lsl #28",
"add w20, w21, w20",
"bfxil x4, x20, #0, #16",
"eor w22, w21, #0xffff",
"strb w22, [x28, #708]",
"strb w20, [x28, #706]",
"lsl w22, w20, #16",
"tst w22, w22",
"mrs x22, nzcv",
"ubfx w23, w20, #16, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w21, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
]
},
@ -1851,7 +1835,7 @@
]
},
"sub bl, cl": {
"ExpectedInstructionCount": 20,
"ExpectedInstructionCount": 18,
"Optimal": "No",
"Comment": "0x28",
"ExpectedArm64ASM": [
@ -1859,15 +1843,13 @@
"uxtb w21, w7",
"sub w22, w21, w20",
"bfxil x7, x22, #0, #8",
"uxtb w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #24",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w21, w20",
"cset x24, lo",
"ubfx w24, w22, #8, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -1878,7 +1860,7 @@
]
},
"sub bx, cx": {
"ExpectedInstructionCount": 20,
"ExpectedInstructionCount": 18,
"Optimal": "No",
"Comment": "0x29",
"ExpectedArm64ASM": [
@ -1886,15 +1868,13 @@
"uxth w21, w7",
"sub w22, w21, w20",
"bfxil x7, x22, #0, #16",
"uxth w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w21, w20",
"cset x24, lo",
"ubfx w24, w22, #16, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -1938,7 +1918,7 @@
]
},
"db 0x2A, 0xcb": {
"ExpectedInstructionCount": 20,
"ExpectedInstructionCount": 18,
"Optimal": "No",
"Comment": [
"0x2A",
@ -1949,15 +1929,13 @@
"uxtb w21, w5",
"sub w22, w21, w20",
"bfxil x5, x22, #0, #8",
"uxtb w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #24",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w21, w20",
"cset x24, lo",
"ubfx w24, w22, #8, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -1968,7 +1946,7 @@
]
},
"db 0x66, 0x2B, 0xcb": {
"ExpectedInstructionCount": 20,
"ExpectedInstructionCount": 18,
"Optimal": "No",
"Comment": [
"0x2B",
@ -1979,15 +1957,13 @@
"uxth w21, w5",
"sub w22, w21, w20",
"bfxil x5, x22, #0, #16",
"uxth w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w21, w20",
"cset x24, lo",
"ubfx w24, w22, #16, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -2037,21 +2013,19 @@
]
},
"sub al, 1": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": "0x2C",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"sub w21, w20, #0x1 (1)",
"bfxil x4, x21, #0, #8",
"uxtb w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #24",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w20, #0x1 (1)",
"cset x23, lo",
"ubfx w23, w21, #8, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w20, w21",
"ubfx w20, w20, #7, #1",
@ -2060,21 +2034,19 @@
]
},
"sub ax, 1": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": "0x2D",
"ExpectedArm64ASM": [
"uxth w20, w4",
"sub w21, w20, #0x1 (1)",
"bfxil x4, x21, #0, #16",
"uxth w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #16",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w20, #0x1 (1)",
"cset x23, lo",
"ubfx w23, w21, #16, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w20, w21",
"ubfx w20, w20, #15, #1",
@ -2113,22 +2085,20 @@
]
},
"sub al, -1": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 15,
"Optimal": "No",
"Comment": "0x2C",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"sub w21, w20, #0xff (255)",
"bfxil x4, x21, #0, #8",
"uxtb w21, w21",
"eor w22, w20, #0xff",
"strb w22, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #24",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w20, #0xff (255)",
"cset x23, lo",
"ubfx w23, w21, #8, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w21, w20",
"ubfx w20, w20, #7, #1",
@ -2137,27 +2107,25 @@
]
},
"sub ax, -1": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "0x2D",
"ExpectedArm64ASM": [
"mov w20, #0xffff",
"uxth w21, w4",
"sub w22, w21, w20",
"bfxil x4, x22, #0, #16",
"uxth w22, w22",
"eor w23, w21, #0xffff",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w21, w20",
"cset x20, lo",
"orr w20, w23, w20, lsl #29",
"bic w21, w22, w21",
"ubfx w21, w21, #15, #1",
"orr w20, w20, w21, lsl #28",
"sub w20, w21, w20",
"bfxil x4, x20, #0, #16",
"eor w22, w21, #0xffff",
"strb w22, [x28, #708]",
"strb w20, [x28, #706]",
"lsl w22, w20, #16",
"tst w22, w22",
"mrs x22, nzcv",
"ubfx w23, w20, #16, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w20, w21",
"ubfx w20, w20, #15, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
]
},
@ -2380,22 +2348,20 @@
]
},
"cmp bl, cl": {
"ExpectedInstructionCount": 19,
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": "0x38",
"ExpectedArm64ASM": [
"uxtb w20, w5",
"uxtb w21, w7",
"sub w22, w21, w20",
"uxtb w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #24",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w21, w20",
"cset x24, lo",
"ubfx w24, w22, #8, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -2464,22 +2430,20 @@
]
},
"cmp bx, cx": {
"ExpectedInstructionCount": 19,
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": "0x39",
"ExpectedArm64ASM": [
"uxth w20, w5",
"uxth w21, w7",
"sub w22, w21, w20",
"uxth w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w21, w20",
"cset x24, lo",
"ubfx w24, w22, #16, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -2522,7 +2486,7 @@
]
},
"db 0x3A, 0xcb": {
"ExpectedInstructionCount": 19,
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": [
"0x3A",
@ -2532,15 +2496,13 @@
"uxtb w20, w7",
"uxtb w21, w5",
"sub w22, w21, w20",
"uxtb w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #24",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w21, w20",
"cset x24, lo",
"ubfx w24, w22, #8, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -2551,7 +2513,7 @@
]
},
"db 0x66, 0x3B, 0xcb": {
"ExpectedInstructionCount": 19,
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": [
"0x3B",
@ -2561,15 +2523,13 @@
"uxth w20, w7",
"uxth w21, w5",
"sub w22, w21, w20",
"uxth w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w21, w20",
"cset x24, lo",
"ubfx w24, w22, #16, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -2618,20 +2578,18 @@
]
},
"cmp al, 1": {
"ExpectedInstructionCount": 15,
"ExpectedInstructionCount": 13,
"Optimal": "No",
"Comment": "0x3C",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"sub w21, w20, #0x1 (1)",
"uxtb w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #24",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w20, #0x1 (1)",
"cset x23, lo",
"ubfx w23, w21, #8, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w20, w21",
"ubfx w20, w20, #7, #1",
@ -2640,20 +2598,18 @@
]
},
"cmp ax, 1": {
"ExpectedInstructionCount": 15,
"ExpectedInstructionCount": 13,
"Optimal": "No",
"Comment": "0x3D",
"ExpectedArm64ASM": [
"uxth w20, w4",
"sub w21, w20, #0x1 (1)",
"uxth w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #16",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w20, #0x1 (1)",
"cset x23, lo",
"ubfx w23, w21, #16, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w20, w21",
"ubfx w20, w20, #15, #1",
@ -2691,21 +2647,19 @@
]
},
"cmp al, -1": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": "0x3C",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"sub w21, w20, #0xff (255)",
"uxtb w21, w21",
"eor w22, w20, #0xff",
"strb w22, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #24",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w20, #0xff (255)",
"cset x23, lo",
"ubfx w23, w21, #8, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w21, w20",
"ubfx w20, w20, #7, #1",
@ -2714,26 +2668,24 @@
]
},
"cmp ax, -1": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 15,
"Optimal": "No",
"Comment": "0x3D",
"ExpectedArm64ASM": [
"mov w20, #0xffff",
"uxth w21, w4",
"sub w22, w21, w20",
"uxth w22, w22",
"eor w23, w21, #0xffff",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w21, w20",
"cset x20, lo",
"orr w20, w23, w20, lsl #29",
"bic w21, w22, w21",
"ubfx w21, w21, #15, #1",
"orr w20, w20, w21, lsl #28",
"sub w20, w21, w20",
"eor w22, w21, #0xffff",
"strb w22, [x28, #708]",
"strb w20, [x28, #706]",
"lsl w22, w20, #16",
"tst w22, w22",
"mrs x22, nzcv",
"ubfx w23, w20, #16, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w20, w21",
"ubfx w20, w20, #15, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
]
},
@ -4089,7 +4041,7 @@
]
},
"cmpsb": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Optimal": "No",
"Comment": [
"Direction flag increment/decrement location can be made with a single sbfx",
@ -4099,7 +4051,6 @@
"ldrb w20, [x11]",
"ldrb w21, [x10]",
"sub w22, w21, w20",
"uxtb w22, w22",
"ldrb w23, [x28, #714]",
"mov w24, #0x1",
"mov x25, #0xffffffffffffffff",
@ -4113,8 +4064,7 @@
"lsl w23, w22, #24",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w21, w20",
"cset x24, lo",
"ubfx w24, w22, #8, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -4125,7 +4075,7 @@
]
},
"cmpsw": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Optimal": "No",
"Comment": [
"Direction flag increment/decrement location can be made with a tst+mov+csel triple",
@ -4135,7 +4085,6 @@
"ldrh w20, [x11]",
"ldrh w21, [x10]",
"sub w22, w21, w20",
"uxth w22, w22",
"ldrb w23, [x28, #714]",
"mov w24, #0x2",
"mov x25, #0xfffffffffffffffe",
@ -4149,8 +4098,7 @@
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w21, w20",
"cset x24, lo",
"ubfx w24, w22, #16, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -4215,7 +4163,7 @@
]
},
"repz cmpsb": {
"ExpectedInstructionCount": 30,
"ExpectedInstructionCount": 28,
"Optimal": "No",
"Comment": "0xa6",
"ExpectedArm64ASM": [
@ -4224,19 +4172,17 @@
"mov x22, #0xffffffffffffffff",
"cmp x20, #0x0 (0)",
"csel x20, x21, x22, eq",
"cbz x5, #+0x64",
"cbz x5, #+0x5c",
"ldrb w21, [x11]",
"ldrb w22, [x10]",
"sub w23, w22, w21",
"uxtb w23, w23",
"eor w24, w22, w21",
"strb w24, [x28, #708]",
"strb w23, [x28, #706]",
"lsl w24, w23, #24",
"tst w24, w24",
"mrs x24, nzcv",
"cmp w22, w21",
"cset x25, lo",
"ubfx w25, w23, #8, #1",
"orr w24, w24, w25, lsl #29",
"eor w21, w22, w21",
"eor w22, w23, w22",
@ -4248,11 +4194,11 @@
"add x11, x11, x20",
"add x10, x10, x20",
"ubfx w21, w21, #30, #1",
"cbnz w21, #-0x60"
"cbnz w21, #-0x58"
]
},
"repz cmpsw": {
"ExpectedInstructionCount": 30,
"ExpectedInstructionCount": 28,
"Optimal": "No",
"Comment": "0xa7",
"ExpectedArm64ASM": [
@ -4261,19 +4207,17 @@
"mov x22, #0xfffffffffffffffe",
"cmp x20, #0x0 (0)",
"csel x20, x21, x22, eq",
"cbz x5, #+0x64",
"cbz x5, #+0x5c",
"ldrh w21, [x11]",
"ldrh w22, [x10]",
"sub w23, w22, w21",
"uxth w23, w23",
"eor w24, w22, w21",
"strb w24, [x28, #708]",
"strb w23, [x28, #706]",
"lsl w24, w23, #16",
"tst w24, w24",
"mrs x24, nzcv",
"cmp w22, w21",
"cset x25, lo",
"ubfx w25, w23, #16, #1",
"orr w24, w24, w25, lsl #29",
"eor w21, w22, w21",
"eor w22, w23, w22",
@ -4285,7 +4229,7 @@
"add x11, x11, x20",
"add x10, x10, x20",
"ubfx w21, w21, #30, #1",
"cbnz w21, #-0x60"
"cbnz w21, #-0x58"
]
},
"repz cmpsd": {
@ -4345,7 +4289,7 @@
]
},
"repnz cmpsb": {
"ExpectedInstructionCount": 30,
"ExpectedInstructionCount": 28,
"Optimal": "No",
"Comment": "0xa6",
"ExpectedArm64ASM": [
@ -4354,19 +4298,17 @@
"mov x22, #0xffffffffffffffff",
"cmp x20, #0x0 (0)",
"csel x20, x21, x22, eq",
"cbz x5, #+0x64",
"cbz x5, #+0x5c",
"ldrb w21, [x11]",
"ldrb w22, [x10]",
"sub w23, w22, w21",
"uxtb w23, w23",
"eor w24, w22, w21",
"strb w24, [x28, #708]",
"strb w23, [x28, #706]",
"lsl w24, w23, #24",
"tst w24, w24",
"mrs x24, nzcv",
"cmp w22, w21",
"cset x25, lo",
"ubfx w25, w23, #8, #1",
"orr w24, w24, w25, lsl #29",
"eor w21, w22, w21",
"eor w22, w23, w22",
@ -4378,11 +4320,11 @@
"add x11, x11, x20",
"add x10, x10, x20",
"ubfx w21, w21, #30, #1",
"cbz w21, #-0x60"
"cbz w21, #-0x58"
]
},
"repnz cmpsw": {
"ExpectedInstructionCount": 30,
"ExpectedInstructionCount": 28,
"Optimal": "No",
"Comment": "0xa7",
"ExpectedArm64ASM": [
@ -4391,19 +4333,17 @@
"mov x22, #0xfffffffffffffffe",
"cmp x20, #0x0 (0)",
"csel x20, x21, x22, eq",
"cbz x5, #+0x64",
"cbz x5, #+0x5c",
"ldrh w21, [x11]",
"ldrh w22, [x10]",
"sub w23, w22, w21",
"uxth w23, w23",
"eor w24, w22, w21",
"strb w24, [x28, #708]",
"strb w23, [x28, #706]",
"lsl w24, w23, #16",
"tst w24, w24",
"mrs x24, nzcv",
"cmp w22, w21",
"cset x25, lo",
"ubfx w25, w23, #16, #1",
"orr w24, w24, w25, lsl #29",
"eor w21, w22, w21",
"eor w22, w23, w22",
@ -4415,7 +4355,7 @@
"add x11, x11, x20",
"add x10, x10, x20",
"ubfx w21, w21, #30, #1",
"cbz w21, #-0x60"
"cbz w21, #-0x58"
]
},
"repnz cmpsd": {
@ -4870,14 +4810,13 @@
]
},
"scasb": {
"ExpectedInstructionCount": 25,
"ExpectedInstructionCount": 23,
"Optimal": "No",
"Comment": "0xae",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"ldrb w21, [x11]",
"sub w22, w20, w21",
"uxtb w22, w22",
"mov w23, #0x1",
"mov x24, #0xffffffffffffffff",
"ldrb w25, [x28, #714]",
@ -4890,8 +4829,7 @@
"lsl w23, w22, #24",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w20, w21",
"cset x24, lo",
"ubfx w24, w22, #8, #1",
"orr w23, w23, w24, lsl #29",
"eor w21, w20, w21",
"eor w20, w22, w20",
@ -4902,14 +4840,13 @@
]
},
"scasw": {
"ExpectedInstructionCount": 25,
"ExpectedInstructionCount": 23,
"Optimal": "No",
"Comment": "0xaf",
"ExpectedArm64ASM": [
"uxth w20, w4",
"ldrh w21, [x11]",
"sub w22, w20, w21",
"uxth w22, w22",
"mov w23, #0x2",
"mov x24, #0xfffffffffffffffe",
"ldrb w25, [x28, #714]",
@ -4922,8 +4859,7 @@
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w20, w21",
"cset x24, lo",
"ubfx w24, w22, #16, #1",
"orr w23, w23, w24, lsl #29",
"eor w21, w20, w21",
"eor w20, w22, w20",
@ -4979,7 +4915,7 @@
]
},
"repz scasb": {
"ExpectedInstructionCount": 29,
"ExpectedInstructionCount": 27,
"Optimal": "No",
"Comment": "0xae",
"ExpectedArm64ASM": [
@ -4988,19 +4924,17 @@
"ldrb w22, [x28, #714]",
"cmp x22, #0x0 (0)",
"csel x20, x20, x21, eq",
"cbz x5, #+0x60",
"cbz x5, #+0x58",
"uxtb w21, w4",
"ldrb w22, [x11]",
"sub w23, w21, w22",
"uxtb w23, w23",
"eor w24, w21, w22",
"strb w24, [x28, #708]",
"strb w23, [x28, #706]",
"lsl w24, w23, #24",
"tst w24, w24",
"mrs x24, nzcv",
"cmp w21, w22",
"cset x25, lo",
"ubfx w25, w23, #8, #1",
"orr w24, w24, w25, lsl #29",
"eor w22, w21, w22",
"eor w21, w23, w21",
@ -5011,11 +4945,11 @@
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"ubfx w21, w21, #30, #1",
"cbnz w21, #-0x5c"
"cbnz w21, #-0x54"
]
},
"repz scasw": {
"ExpectedInstructionCount": 29,
"ExpectedInstructionCount": 27,
"Optimal": "No",
"Comment": "0xaf",
"ExpectedArm64ASM": [
@ -5024,19 +4958,17 @@
"ldrb w22, [x28, #714]",
"cmp x22, #0x0 (0)",
"csel x20, x20, x21, eq",
"cbz x5, #+0x60",
"cbz x5, #+0x58",
"uxth w21, w4",
"ldrh w22, [x11]",
"sub w23, w21, w22",
"uxth w23, w23",
"eor w24, w21, w22",
"strb w24, [x28, #708]",
"strb w23, [x28, #706]",
"lsl w24, w23, #16",
"tst w24, w24",
"mrs x24, nzcv",
"cmp w21, w22",
"cset x25, lo",
"ubfx w25, w23, #16, #1",
"orr w24, w24, w25, lsl #29",
"eor w22, w21, w22",
"eor w21, w23, w21",
@ -5047,7 +4979,7 @@
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"ubfx w21, w21, #30, #1",
"cbnz w21, #-0x5c"
"cbnz w21, #-0x54"
]
},
"repz scasd": {
@ -5104,7 +5036,7 @@
]
},
"repnz scasb": {
"ExpectedInstructionCount": 29,
"ExpectedInstructionCount": 27,
"Optimal": "No",
"Comment": "0xae",
"ExpectedArm64ASM": [
@ -5113,19 +5045,17 @@
"ldrb w22, [x28, #714]",
"cmp x22, #0x0 (0)",
"csel x20, x20, x21, eq",
"cbz x5, #+0x60",
"cbz x5, #+0x58",
"uxtb w21, w4",
"ldrb w22, [x11]",
"sub w23, w21, w22",
"uxtb w23, w23",
"eor w24, w21, w22",
"strb w24, [x28, #708]",
"strb w23, [x28, #706]",
"lsl w24, w23, #24",
"tst w24, w24",
"mrs x24, nzcv",
"cmp w21, w22",
"cset x25, lo",
"ubfx w25, w23, #8, #1",
"orr w24, w24, w25, lsl #29",
"eor w22, w21, w22",
"eor w21, w23, w21",
@ -5136,11 +5066,11 @@
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"ubfx w21, w21, #30, #1",
"cbz w21, #-0x5c"
"cbz w21, #-0x54"
]
},
"repnz scasw": {
"ExpectedInstructionCount": 29,
"ExpectedInstructionCount": 27,
"Optimal": "No",
"Comment": "0xaf",
"ExpectedArm64ASM": [
@ -5149,19 +5079,17 @@
"ldrb w22, [x28, #714]",
"cmp x22, #0x0 (0)",
"csel x20, x20, x21, eq",
"cbz x5, #+0x60",
"cbz x5, #+0x58",
"uxth w21, w4",
"ldrh w22, [x11]",
"sub w23, w21, w22",
"uxth w23, w23",
"eor w24, w21, w22",
"strb w24, [x28, #708]",
"strb w23, [x28, #706]",
"lsl w24, w23, #16",
"tst w24, w24",
"mrs x24, nzcv",
"cmp w21, w22",
"cset x25, lo",
"ubfx w25, w23, #16, #1",
"orr w24, w24, w25, lsl #29",
"eor w22, w21, w22",
"eor w21, w23, w21",
@ -5172,7 +5100,7 @@
"sub x5, x5, #0x1 (1)",
"add x11, x11, x20",
"ubfx w21, w21, #30, #1",
"cbz w21, #-0x5c"
"cbz w21, #-0x54"
]
},
"repnz scasd": {

View File

@ -13,21 +13,19 @@
],
"Instructions": {
"add al, 1": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": "GROUP1 0x80 /0",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"add w21, w20, #0x1 (1)",
"bfxil x4, x21, #0, #8",
"uxtb w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #24",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w21, #0x1 (1)",
"cset x23, lo",
"ubfx w23, w21, #8, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w21, w20",
"ubfx w20, w20, #7, #1",
@ -128,21 +126,19 @@
]
},
"sub al, 1": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": "GROUP1 0x80 /5",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"sub w21, w20, #0x1 (1)",
"bfxil x4, x21, #0, #8",
"uxtb w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #24",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w20, #0x1 (1)",
"cset x23, lo",
"ubfx w23, w21, #8, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w20, w21",
"ubfx w20, w20, #7, #1",
@ -166,20 +162,18 @@
]
},
"cmp al, 1": {
"ExpectedInstructionCount": 15,
"ExpectedInstructionCount": 13,
"Optimal": "No",
"Comment": "GROUP1 0x80 /7",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"sub w21, w20, #0x1 (1)",
"uxtb w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #24",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w20, #0x1 (1)",
"cset x23, lo",
"ubfx w23, w21, #8, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w20, w21",
"ubfx w20, w20, #7, #1",
@ -188,22 +182,20 @@
]
},
"add al, -1": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 15,
"Optimal": "No",
"Comment": "GROUP1 0x80 /0",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"add w21, w20, #0xff (255)",
"bfxil x4, x21, #0, #8",
"uxtb w21, w21",
"eor w22, w20, #0xff",
"strb w22, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #24",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w21, #0xff (255)",
"cset x23, lo",
"ubfx w23, w21, #8, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w20, w21",
"ubfx w20, w20, #7, #1",
@ -306,22 +298,20 @@
]
},
"sub al, -1": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 15,
"Optimal": "No",
"Comment": "GROUP1 0x80 /5",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"sub w21, w20, #0xff (255)",
"bfxil x4, x21, #0, #8",
"uxtb w21, w21",
"eor w22, w20, #0xff",
"strb w22, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #24",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w20, #0xff (255)",
"cset x23, lo",
"ubfx w23, w21, #8, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w21, w20",
"ubfx w20, w20, #7, #1",
@ -345,21 +335,19 @@
]
},
"cmp al, -1": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": "GROUP1 0x80 /7",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"sub w21, w20, #0xff (255)",
"uxtb w21, w21",
"eor w22, w20, #0xff",
"strb w22, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #24",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w20, #0xff (255)",
"cset x23, lo",
"ubfx w23, w21, #8, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w21, w20",
"ubfx w20, w20, #7, #1",
@ -368,21 +356,19 @@
]
},
"add ax, 256": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": "GROUP1 0x81 /0",
"ExpectedArm64ASM": [
"uxth w20, w4",
"add w21, w20, #0x100 (256)",
"bfxil x4, x21, #0, #16",
"uxth w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #16",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w21, #0x100 (256)",
"cset x23, lo",
"ubfx w23, w21, #16, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w21, w20",
"ubfx w20, w20, #15, #1",
@ -665,26 +651,24 @@
]
},
"add ax, -256": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 15,
"Optimal": "No",
"Comment": "GROUP1 0x81 /0",
"ExpectedArm64ASM": [
"mov w20, #0xff00",
"uxth w21, w4",
"add w22, w21, w20",
"bfxil x4, x22, #0, #16",
"uxth w22, w22",
"add w20, w21, w20",
"bfxil x4, x20, #0, #16",
"strb w21, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w22, w20",
"cset x20, lo",
"orr w20, w23, w20, lsl #29",
"bic w21, w21, w22",
"ubfx w21, w21, #15, #1",
"orr w20, w20, w21, lsl #28",
"strb w20, [x28, #706]",
"lsl w22, w20, #16",
"tst w22, w22",
"mrs x22, nzcv",
"ubfx w23, w20, #16, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w21, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
]
},
@ -969,21 +953,19 @@
]
},
"add ax, 1": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": "GROUP1 0x83 /0",
"ExpectedArm64ASM": [
"uxth w20, w4",
"add w21, w20, #0x1 (1)",
"bfxil x4, x21, #0, #16",
"uxth w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #16",
"tst w22, w22",
"mrs x22, nzcv",
"cmp w21, #0x1 (1)",
"cset x23, lo",
"ubfx w23, w21, #16, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w21, w20",
"ubfx w20, w20, #15, #1",
@ -1266,27 +1248,25 @@
]
},
"add ax, -1": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "GROUP1 0x83 /0",
"ExpectedArm64ASM": [
"mov w20, #0xffff",
"uxth w21, w4",
"add w22, w21, w20",
"bfxil x4, x22, #0, #16",
"uxth w22, w22",
"eor w23, w21, #0xffff",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w22, w20",
"cset x20, lo",
"orr w20, w23, w20, lsl #29",
"bic w21, w21, w22",
"ubfx w21, w21, #15, #1",
"orr w20, w20, w21, lsl #28",
"add w20, w21, w20",
"bfxil x4, x20, #0, #16",
"eor w22, w21, #0xffff",
"strb w22, [x28, #708]",
"strb w20, [x28, #706]",
"lsl w22, w20, #16",
"tst w22, w22",
"mrs x22, nzcv",
"ubfx w23, w20, #16, #1",
"orr w22, w22, w23, lsl #29",
"bic w20, w21, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
]
},
@ -3550,26 +3530,23 @@
]
},
"neg bl": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": "GROUP2 0xf6 /3",
"ExpectedArm64ASM": [
"mov w20, #0x0",
"uxtb w21, w7",
"neg w22, w21",
"bfxil x7, x22, #0, #8",
"uxtb w22, w22",
"strb w21, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #24",
"tst w23, w23",
"mrs x23, nzcv",
"cmp x20, x21",
"cset x20, lo",
"orr w20, w23, w20, lsl #29",
"and w21, w22, w21",
"ubfx w21, w21, #7, #1",
"orr w20, w20, w21, lsl #28",
"uxtb w20, w7",
"neg w21, w20",
"bfxil x7, x21, #0, #8",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #24",
"tst w22, w22",
"mrs x22, nzcv",
"ubfx w23, w21, #8, #1",
"orr w22, w22, w23, lsl #29",
"and w20, w21, w20",
"ubfx w20, w20, #7, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
]
},
@ -3758,26 +3735,23 @@
]
},
"neg bx": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": "GROUP2 0xf7 /2",
"ExpectedArm64ASM": [
"mov w20, #0x0",
"uxth w21, w7",
"neg w22, w21",
"bfxil x7, x22, #0, #16",
"uxth w22, w22",
"strb w21, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp x20, x21",
"cset x20, lo",
"orr w20, w23, w20, lsl #29",
"and w21, w22, w21",
"ubfx w21, w21, #15, #1",
"orr w20, w20, w21, lsl #28",
"uxth w20, w7",
"neg w21, w20",
"bfxil x7, x21, #0, #16",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"lsl w22, w21, #16",
"tst w22, w22",
"mrs x22, nzcv",
"ubfx w23, w21, #16, #1",
"orr w22, w22, w23, lsl #29",
"and w20, w21, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
]
},
@ -4065,14 +4039,13 @@
]
},
"inc al": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 15,
"Optimal": "No",
"Comment": "GROUP3 0xfe /0",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"add w21, w20, #0x1 (1)",
"bfxil x4, x21, #0, #8",
"uxtb w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"ldr w22, [x28, #728]",
@ -4088,14 +4061,13 @@
]
},
"dec al": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 15,
"Optimal": "No",
"Comment": "GROUP3 0xfe /1",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"sub w21, w20, #0x1 (1)",
"bfxil x4, x21, #0, #8",
"uxtb w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"ldr w22, [x28, #728]",
@ -4111,14 +4083,13 @@
]
},
"inc ax": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 15,
"Optimal": "No",
"Comment": "GROUP4 0xfe /0",
"ExpectedArm64ASM": [
"uxth w20, w4",
"add w21, w20, #0x1 (1)",
"bfxil x4, x21, #0, #16",
"uxth w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"ldr w22, [x28, #728]",
@ -4168,14 +4139,13 @@
]
},
"dec ax": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 15,
"Optimal": "No",
"Comment": "GROUP4 0xfe /1",
"ExpectedArm64ASM": [
"uxth w20, w4",
"sub w21, w20, #0x1 (1)",
"bfxil x4, x21, #0, #16",
"uxth w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"ldr w22, [x28, #728]",

View File

@ -286,14 +286,13 @@
]
},
"inc ax": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 15,
"Optimal": "No",
"Comment": "0x40",
"ExpectedArm64ASM": [
"uxth w20, w4",
"add w21, w20, #0x1 (1)",
"bfxil w4, w21, #0, #16",
"uxth w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"ldr w22, [x28, #728]",
@ -326,14 +325,13 @@
]
},
"dec ax": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 15,
"Optimal": "No",
"Comment": "0x48",
"ExpectedArm64ASM": [
"uxth w20, w4",
"sub w21, w20, #0x1 (1)",
"bfxil w4, w21, #0, #16",
"uxth w21, w21",
"strb w20, [x28, #708]",
"strb w21, [x28, #706]",
"ldr w22, [x28, #728]",

View File

@ -2292,7 +2292,7 @@
]
},
"cmpxchg al, bl": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Optimal": "No",
"Comment": "0x0f 0xb0",
"ExpectedArm64ASM": [
@ -2306,15 +2306,13 @@
"bfxil x4, x23, #0, #8",
"bfxil x4, x20, #0, #8",
"sub x20, x22, x23",
"uxtb x20, w20",
"eor w21, w22, w23",
"strb w21, [x28, #708]",
"strb w20, [x28, #706]",
"lsl w21, w20, #24",
"tst w21, w21",
"mrs x21, nzcv",
"cmp x22, x23",
"cset x24, lo",
"ubfx w24, w20, #8, #1",
"orr w21, w21, w24, lsl #29",
"eor w23, w22, w23",
"eor w20, w20, w22",
@ -2325,7 +2323,7 @@
]
},
"cmpxchg [rax], bl": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 21,
"Optimal": "No",
"Comment": "0x0f 0xb0",
"ExpectedArm64ASM": [
@ -2336,15 +2334,13 @@
"mov w20, w1",
"bfxil x4, x20, #0, #8",
"sub w22, w21, w20",
"uxtb w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #24",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w21, w20",
"cset x24, lo",
"ubfx w24, w22, #8, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -2355,7 +2351,7 @@
]
},
"cmpxchg ax, bx": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Optimal": "No",
"Comment": "0x0f 0xb1",
"ExpectedArm64ASM": [
@ -2369,15 +2365,13 @@
"bfxil x4, x23, #0, #16",
"bfxil x4, x20, #0, #16",
"sub x20, x22, x23",
"uxth x20, w20",
"eor w21, w22, w23",
"strb w21, [x28, #708]",
"strb w20, [x28, #706]",
"lsl w21, w20, #16",
"tst w21, w21",
"mrs x21, nzcv",
"cmp x22, x23",
"cset x24, lo",
"ubfx w24, w20, #16, #1",
"orr w21, w21, w24, lsl #29",
"eor w23, w22, w23",
"eor w20, w20, w22",
@ -2388,7 +2382,7 @@
]
},
"cmpxchg [rax], bx": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 21,
"Optimal": "No",
"Comment": "0x0f 0xb1",
"ExpectedArm64ASM": [
@ -2399,15 +2393,13 @@
"mov w20, w1",
"bfxil x4, x20, #0, #16",
"sub w22, w21, w20",
"uxth w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w21, w20",
"cset x24, lo",
"ubfx w24, w22, #16, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -3112,7 +3104,7 @@
]
},
"xadd al, bl": {
"ExpectedInstructionCount": 21,
"ExpectedInstructionCount": 19,
"Optimal": "No",
"Comment": "0x0f 0xc0",
"ExpectedArm64ASM": [
@ -3121,15 +3113,13 @@
"add w22, w20, w21",
"bfxil x7, x20, #0, #8",
"bfxil x4, x22, #0, #8",
"uxtb w22, w22",
"eor w23, w20, w21",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #24",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w22, w21",
"cset x24, lo",
"ubfx w24, w22, #8, #1",
"orr w23, w23, w24, lsl #29",
"eor w21, w20, w21",
"eor w20, w22, w20",
@ -3140,7 +3130,7 @@
]
},
"xadd [rax], bl": {
"ExpectedInstructionCount": 20,
"ExpectedInstructionCount": 18,
"Optimal": "No",
"Comment": "0x0f 0xc0",
"ExpectedArm64ASM": [
@ -3148,15 +3138,13 @@
"ldaddalb w20, w21, [x4]",
"bfxil x7, x21, #0, #8",
"add w22, w21, w20",
"uxtb w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #24",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w22, w20",
"cset x24, lo",
"ubfx w24, w22, #8, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
@ -3167,7 +3155,7 @@
]
},
"xadd ax, bx": {
"ExpectedInstructionCount": 21,
"ExpectedInstructionCount": 19,
"Optimal": "No",
"Comment": "0x0f 0xc1",
"ExpectedArm64ASM": [
@ -3176,15 +3164,13 @@
"add w22, w20, w21",
"bfxil x7, x20, #0, #16",
"bfxil x4, x22, #0, #16",
"uxth w22, w22",
"eor w23, w20, w21",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w22, w21",
"cset x24, lo",
"ubfx w24, w22, #16, #1",
"orr w23, w23, w24, lsl #29",
"eor w21, w20, w21",
"eor w20, w22, w20",
@ -3195,7 +3181,7 @@
]
},
"xadd [rax], bx": {
"ExpectedInstructionCount": 20,
"ExpectedInstructionCount": 18,
"Optimal": "No",
"Comment": "0x0f 0xc1",
"ExpectedArm64ASM": [
@ -3203,15 +3189,13 @@
"ldaddalh w20, w21, [x4]",
"bfxil x7, x21, #0, #16",
"add w22, w21, w20",
"uxth w22, w22",
"eor w23, w21, w20",
"strb w23, [x28, #708]",
"strb w22, [x28, #706]",
"lsl w23, w22, #16",
"tst w23, w23",
"mrs x23, nzcv",
"cmp w22, w20",
"cset x24, lo",
"ubfx w24, w22, #16, #1",
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",