Merge pull request #3147 from alyssarosenzweig/opt/0924

More opts to the dispatcher + 1 to the JIT
This commit is contained in:
Ryan Houdek 2023-09-24 17:01:37 -07:00 committed by GitHub
commit bee97309f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 233 additions and 400 deletions

View File

@ -83,47 +83,32 @@ DEF_OP(Break) {
DEF_OP(GetRoundingMode) {
auto Dst = GetReg(Node);
mrs(Dst, ARMEmitter::SystemRegister::FPCR);
lsr(ARMEmitter::Size::i64Bit, Dst, Dst, 22);
ubfx(ARMEmitter::Size::i64Bit, Dst, Dst, 22, 3);
// FTZ is already in the correct location
// Rounding mode is different
and_(ARMEmitter::Size::i64Bit, TMP1, Dst, 0b11);
//
// Need to remap rounding mode from order nearest, pos inf, neg inf, toward
// zero. Just swapping 01 and 10. That's a bitfield reverse. Round mode is in
// bottom two bits. After reversing as a 32-bit operation, it'll be in [31:30]
// and ripe for reinsertion back at 0.
static_assert(IR::ROUND_MODE_NEAREST == 0);
static_assert(IR::ROUND_MODE_NEGATIVE_INFINITY == 1);
static_assert(IR::ROUND_MODE_POSITIVE_INFINITY == 2);
static_assert(IR::ROUND_MODE_TOWARDS_ZERO == 3);
cmp(ARMEmitter::Size::i64Bit, TMP1, 1);
LoadConstant(ARMEmitter::Size::i64Bit, TMP3, IR::ROUND_MODE_POSITIVE_INFINITY);
csel(ARMEmitter::Size::i64Bit, TMP2, TMP3, ARMEmitter::Reg::zr, ARMEmitter::Condition::CC_EQ);
cmp(ARMEmitter::Size::i64Bit, TMP1, 2);
LoadConstant(ARMEmitter::Size::i64Bit, TMP3, IR::ROUND_MODE_NEGATIVE_INFINITY);
csel(ARMEmitter::Size::i64Bit, TMP2, TMP3, TMP2, ARMEmitter::Condition::CC_EQ);
cmp(ARMEmitter::Size::i64Bit, TMP1, 3);
LoadConstant(ARMEmitter::Size::i64Bit, TMP3, IR::ROUND_MODE_TOWARDS_ZERO);
csel(ARMEmitter::Size::i64Bit, TMP2, TMP3, TMP2, ARMEmitter::Condition::CC_EQ);
orr(ARMEmitter::Size::i64Bit, Dst, Dst, TMP2.R());
bfi(ARMEmitter::Size::i64Bit, Dst, TMP2, 0, 2);
rbit(ARMEmitter::Size::i32Bit, TMP1, Dst);
bfi(ARMEmitter::Size::i64Bit, Dst, TMP1, 30, 2);
}
DEF_OP(SetRoundingMode) {
auto Op = IROp->C<IR::IROp_SetRoundingMode>();
auto Src = GetReg(Op->RoundMode.ID());
// Setup the rounding flags correctly
and_(ARMEmitter::Size::i64Bit, TMP1, Src, 0b11);
cmp(ARMEmitter::Size::i64Bit, TMP1, IR::ROUND_MODE_POSITIVE_INFINITY);
LoadConstant(ARMEmitter::Size::i64Bit, TMP3, 1);
csel(ARMEmitter::Size::i64Bit, TMP2, TMP3, ARMEmitter::Reg::zr, ARMEmitter::Condition::CC_EQ);
cmp(ARMEmitter::Size::i64Bit, TMP1, IR::ROUND_MODE_NEGATIVE_INFINITY);
LoadConstant(ARMEmitter::Size::i64Bit, TMP3, 2);
csel(ARMEmitter::Size::i64Bit, TMP2, TMP3, TMP2, ARMEmitter::Condition::CC_EQ);
cmp(ARMEmitter::Size::i64Bit, TMP1, IR::ROUND_MODE_TOWARDS_ZERO);
LoadConstant(ARMEmitter::Size::i64Bit, TMP3, 3);
csel(ARMEmitter::Size::i64Bit, TMP2, TMP3, TMP2, ARMEmitter::Condition::CC_EQ);
// As above, setup the rounding flags in [31:30]
rbit(ARMEmitter::Size::i32Bit, TMP2, Src);
// and extract
lsr(ARMEmitter::Size::i32Bit, TMP2, TMP2, 30);
mrs(TMP1, ARMEmitter::SystemRegister::FPCR);

View File

@ -1359,7 +1359,7 @@ private:
void CalculatePF(OrderedNode *Res, OrderedNode *condition = nullptr);
void CalculateAF(OpSize OpSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2);
void CalculateOF_Add(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2);
void CalculateOF(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, bool Sub);
void CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF);
void CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF);
void CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF = true);

View File

@ -200,13 +200,39 @@ OrderedNode *OpDispatchBuilder::GetPackedRFLAG(uint32_t FlagsMask) {
return Original;
}
void OpDispatchBuilder::CalculateOF_Add(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) {
void OpDispatchBuilder::CalculateOF(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, bool Sub) {
auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
auto XorOp1 = _Xor(OpSize, Src1, Src2);
auto XorOp2 = _Xor(OpSize, Res, Src1);
OrderedNode *AndOp1 = _Andn(OpSize, XorOp2, XorOp1);
AndOp1 = _Bfe(OpSize, 1, SrcSize * 8 - 1, AndOp1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_LOC>(AndOp1);
uint64_t SignBit = (SrcSize * 8) - 1;
OrderedNode *Anded = nullptr;
// For add, OF is set iff the sources have the same sign but the destination
// sign differs. If we know a source sign, we can simplify the expression: if
// source 2 is known to be positive, we set OF if source 1 is positive and
// source 2 is negative. Similarly if source 2 is known negative.
//
// For sub, OF is set iff the sources have differing signs and the destination
// sign matches the second source. If source 2 is known positive, set iff
// source 1 negative and source 2 positive.
uint64_t Const;
if (IsValueConstant(WrapNode(Src2), &Const)) {
bool Negative = (Const & (1ull << SignBit)) != 0;
if (Negative ^ Sub)
Anded = _Andn(OpSize, Src1, Res);
else
Anded = _Andn(OpSize, Res, Src1);
} else {
auto XorOp1 = _Xor(OpSize, Src1, Src2);
auto XorOp2 = _Xor(OpSize, Res, Src1);
if (Sub)
Anded = _And(OpSize, XorOp2, XorOp1);
else
Anded = _Andn(OpSize, XorOp2, XorOp1);
}
auto OF = _Bfe(OpSize, 1, SrcSize * 8 - 1, Anded);
SetRFLAG<FEXCore::X86State::RFLAG_OF_LOC>(OF);
}
OrderedNode *OpDispatchBuilder::LoadPFRaw() {
@ -504,7 +530,7 @@ void OpDispatchBuilder::CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Res, Or
}
// Signed
CalculateOF_Add(SrcSize, Res, Src1, Src2);
CalculateOF(SrcSize, Res, Src1, Src2, false);
}
void OpDispatchBuilder::CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) {
@ -527,15 +553,8 @@ void OpDispatchBuilder::CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, Or
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(SelectCF);
}
// OF
// Signed
{
auto XorOp1 = _Xor(OpSize, Src1, Src2);
auto XorOp2 = _Xor(OpSize, Res, Src1);
OrderedNode *AndOp1 = _And(OpSize, XorOp1, XorOp2);
AndOp1 = _Bfe(OpSize, 1, SrcSize * 8 - 1, AndOp1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_LOC>(AndOp1);
}
CalculateOF(SrcSize, Res, Src1, Src2, true);
}
void OpDispatchBuilder::CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF) {
@ -565,16 +584,7 @@ void OpDispatchBuilder::CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Res, Or
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(SelectOp);
}
// OF
{
auto XorOp1 = _Xor(OpSize, Src1, Src2);
auto XorOp2 = _Xor(OpSize, Res, Src1);
OrderedNode *FinalAnd = _And(OpSize, XorOp1, XorOp2);
FinalAnd = _Bfe(OpSize, 1, SrcSize * 8 - 1, FinalAnd);
SetRFLAG<FEXCore::X86State::RFLAG_OF_LOC>(FinalAnd);
}
CalculateOF(SrcSize, Res, Src1, Src2, true);
}
// We stomped over CF while calculation flags, restore it.
@ -607,7 +617,7 @@ void OpDispatchBuilder::CalculateFlags_ADD(uint8_t SrcSize, OrderedNode *Res, Or
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(SelectOp);
}
CalculateOF_Add(SrcSize, Res, Src1, Src2);
CalculateOF(SrcSize, Res, Src1, Src2, false);
}
// We stomped over CF while calculation flags, restore it.
@ -1082,9 +1092,7 @@ void OpDispatchBuilder::CalculateFlags_BLSI(uint8_t SrcSize, OrderedNode *Src) {
// CF
{
auto CFOp = _Select(IR::COND_EQ,
Src, Zero,
Zero, One);
auto CFOp = _Select(IR::COND_NEQ, Src, Zero, One, Zero);
SetRFLAG<X86State::RFLAG_CF_LOC>(CFOp);
}
}
@ -1105,9 +1113,7 @@ void OpDispatchBuilder::CalculateFlags_BLSMSK(OrderedNode *Src) {
_InvalidateFlags((1UL << X86State::RFLAG_PF_LOC) |
(1UL << X86State::RFLAG_AF_LOC));
auto CFOp = _Select(IR::COND_EQ,
Src, Zero,
Zero, One);
auto CFOp = _Select(IR::COND_NEQ, Src, Zero, One, Zero);
SetRFLAG<X86State::RFLAG_CF_LOC>(CFOp);
}
@ -1124,9 +1130,7 @@ void OpDispatchBuilder::CalculateFlags_BLSR(uint8_t SrcSize, OrderedNode *Result
// CF
{
auto CFOp = _Select(IR::COND_EQ,
Src, Zero,
Zero, One);
auto CFOp = _Select(IR::COND_NEQ, Src, Zero, One, Zero);
SetRFLAG<X86State::RFLAG_CF_LOC>(CFOp);
}
}

View File

@ -191,7 +191,7 @@
]
},
"add al, 1": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "0x04",
"ExpectedArm64ASM": [
@ -207,16 +207,14 @@
"cmp w21, #0x1 (1)",
"cset x23, lo",
"orr w22, w22, w23, lsl #29",
"eor w23, w20, #0x1",
"eor w20, w21, w20",
"bic w20, w20, w23",
"bic w20, w21, w20",
"ubfx w20, w20, #7, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
]
},
"add ax, 1": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "0x05",
"ExpectedArm64ASM": [
@ -232,9 +230,7 @@
"cmp w21, #0x1 (1)",
"cset x23, lo",
"orr w22, w22, w23, lsl #29",
"eor w23, w20, #0x1",
"eor w20, w21, w20",
"bic w20, w20, w23",
"bic w20, w21, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
@ -722,7 +718,7 @@
]
},
"adc al, 1": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Optimal": "No",
"Comment": "0x14",
"ExpectedArm64ASM": [
@ -746,16 +742,14 @@
"cmp x21, #0x1 (1)",
"csel x21, x25, x24, eq",
"orr w21, w23, w21, lsl #29",
"eor w23, w22, #0x1",
"eor w20, w20, w22",
"bic w20, w20, w23",
"bic w20, w20, w22",
"ubfx w20, w20, #7, #1",
"orr w20, w21, w20, lsl #28",
"str w20, [x28, #728]"
]
},
"adc ax, 1": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Optimal": "No",
"Comment": "0x15",
"ExpectedArm64ASM": [
@ -779,16 +773,14 @@
"cmp x21, #0x1 (1)",
"csel x21, x25, x24, eq",
"orr w21, w23, w21, lsl #29",
"eor w23, w22, #0x1",
"eor w20, w20, w22",
"bic w20, w20, w23",
"bic w20, w20, w22",
"ubfx w20, w20, #15, #1",
"orr w20, w21, w20, lsl #28",
"str w20, [x28, #728]"
]
},
"adc eax, 1": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 21,
"Optimal": "No",
"Comment": "0x15",
"ExpectedArm64ASM": [
@ -809,16 +801,14 @@
"cmp x21, #0x1 (1)",
"csel x21, x24, x23, eq",
"orr w20, w20, w21, lsl #29",
"eor w21, w22, #0x1",
"eor w22, w4, w22",
"bic w21, w22, w21",
"bic w21, w4, w22",
"lsr w21, w21, #31",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
]
},
"adc rax, 1": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 21,
"Optimal": "No",
"Comment": "0x15",
"ExpectedArm64ASM": [
@ -839,9 +829,7 @@
"cmp x21, #0x1 (1)",
"csel x21, x24, x23, eq",
"orr w20, w20, w21, lsl #29",
"eor x21, x22, #0x1",
"eor x22, x4, x22",
"bic x21, x22, x21",
"bic x21, x4, x22",
"lsr x21, x21, #63",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
@ -875,7 +863,7 @@
"orr w21, w24, w21, lsl #29",
"eor w20, w23, w20",
"eor w22, w22, w23",
"and w20, w20, w22",
"and w20, w22, w20",
"ubfx w20, w20, #7, #1",
"orr w20, w21, w20, lsl #28",
"str w20, [x28, #728]"
@ -909,7 +897,7 @@
"orr w21, w24, w21, lsl #29",
"eor w20, w23, w20",
"eor w22, w22, w23",
"and w20, w20, w22",
"and w20, w22, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w21, w20, lsl #28",
"str w20, [x28, #728]"
@ -940,7 +928,7 @@
"orr w21, w22, w21, lsl #29",
"eor w20, w23, w20",
"eor w22, w7, w23",
"and w20, w20, w22",
"and w20, w22, w20",
"lsr w20, w20, #31",
"orr w20, w21, w20, lsl #28",
"str w20, [x28, #728]"
@ -970,7 +958,7 @@
"orr w20, w21, w20, lsl #29",
"eor x21, x22, x5",
"eor x22, x7, x22",
"and x21, x21, x22",
"and x21, x22, x21",
"lsr x21, x21, #63",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
@ -1007,7 +995,7 @@
"orr w21, w24, w21, lsl #29",
"eor w20, w23, w20",
"eor w22, w22, w23",
"and w20, w20, w22",
"and w20, w22, w20",
"ubfx w20, w20, #7, #1",
"orr w20, w21, w20, lsl #28",
"str w20, [x28, #728]"
@ -1044,7 +1032,7 @@
"orr w21, w24, w21, lsl #29",
"eor w20, w23, w20",
"eor w22, w22, w23",
"and w20, w20, w22",
"and w20, w22, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w21, w20, lsl #28",
"str w20, [x28, #728]"
@ -1078,7 +1066,7 @@
"orr w21, w22, w21, lsl #29",
"eor w20, w23, w20",
"eor w22, w5, w23",
"and w20, w20, w22",
"and w20, w22, w20",
"lsr w20, w20, #31",
"orr w20, w21, w20, lsl #28",
"str w20, [x28, #728]"
@ -1111,14 +1099,14 @@
"orr w20, w21, w20, lsl #29",
"eor x21, x22, x7",
"eor x22, x5, x22",
"and x21, x21, x22",
"and x21, x22, x21",
"lsr x21, x21, #63",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
]
},
"sbb al, 1": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Optimal": "No",
"Comment": "0x1C",
"ExpectedArm64ASM": [
@ -1142,16 +1130,14 @@
"cmp x21, #0x1 (1)",
"csel x21, x25, x24, eq",
"orr w21, w23, w21, lsl #29",
"eor w23, w22, #0x1",
"eor w20, w20, w22",
"and w20, w23, w20",
"bic w20, w22, w20",
"ubfx w20, w20, #7, #1",
"orr w20, w21, w20, lsl #28",
"str w20, [x28, #728]"
]
},
"sbb ax, 1": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Optimal": "No",
"Comment": "0x1D",
"ExpectedArm64ASM": [
@ -1175,16 +1161,14 @@
"cmp x21, #0x1 (1)",
"csel x21, x25, x24, eq",
"orr w21, w23, w21, lsl #29",
"eor w23, w22, #0x1",
"eor w20, w20, w22",
"and w20, w23, w20",
"bic w20, w22, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w21, w20, lsl #28",
"str w20, [x28, #728]"
]
},
"sbb eax, 1": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 21,
"Optimal": "No",
"Comment": "0x1D",
"ExpectedArm64ASM": [
@ -1205,16 +1189,14 @@
"cmp x21, #0x1 (1)",
"csel x21, x24, x23, eq",
"orr w20, w20, w21, lsl #29",
"eor w21, w22, #0x1",
"eor w22, w4, w22",
"and w21, w21, w22",
"bic w21, w22, w4",
"lsr w21, w21, #31",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
]
},
"sbb rax, 1": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 21,
"Optimal": "No",
"Comment": "0x1D",
"ExpectedArm64ASM": [
@ -1235,9 +1217,7 @@
"cmp x21, #0x1 (1)",
"csel x21, x24, x23, eq",
"orr w20, w20, w21, lsl #29",
"eor x21, x22, #0x1",
"eor x22, x4, x22",
"and x21, x21, x22",
"bic x21, x22, x4",
"lsr x21, x21, #63",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
@ -1447,7 +1427,7 @@
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
"and w20, w20, w21",
"and w20, w21, w20",
"ubfx w20, w20, #7, #1",
"orr w20, w23, w20, lsl #28",
"str w20, [x28, #728]"
@ -1474,7 +1454,7 @@
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
"and w20, w20, w21",
"and w20, w21, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w23, w20, lsl #28",
"str w20, [x28, #728]"
@ -1537,7 +1517,7 @@
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
"and w20, w20, w21",
"and w20, w21, w20",
"ubfx w20, w20, #7, #1",
"orr w20, w23, w20, lsl #28",
"str w20, [x28, #728]"
@ -1567,7 +1547,7 @@
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
"and w20, w20, w21",
"and w20, w21, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w23, w20, lsl #28",
"str w20, [x28, #728]"
@ -1613,7 +1593,7 @@
]
},
"sub al, 1": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "0x2C",
"ExpectedArm64ASM": [
@ -1629,16 +1609,14 @@
"cmp w20, #0x1 (1)",
"cset x23, lo",
"orr w22, w22, w23, lsl #29",
"eor w23, w20, #0x1",
"eor w20, w21, w20",
"and w20, w23, w20",
"bic w20, w20, w21",
"ubfx w20, w20, #7, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
]
},
"sub ax, 1": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "0x2D",
"ExpectedArm64ASM": [
@ -1654,9 +1632,7 @@
"cmp w20, #0x1 (1)",
"cset x23, lo",
"orr w22, w22, w23, lsl #29",
"eor w23, w20, #0x1",
"eor w20, w21, w20",
"and w20, w23, w20",
"bic w20, w20, w21",
"ubfx w20, w20, #15, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
@ -1895,7 +1871,7 @@
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
"and w20, w20, w21",
"and w20, w21, w20",
"ubfx w20, w20, #7, #1",
"orr w20, w23, w20, lsl #28",
"str w20, [x28, #728]"
@ -1921,7 +1897,7 @@
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
"and w20, w20, w21",
"and w20, w21, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w23, w20, lsl #28",
"str w20, [x28, #728]"
@ -1982,7 +1958,7 @@
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
"and w20, w20, w21",
"and w20, w21, w20",
"ubfx w20, w20, #7, #1",
"orr w20, w23, w20, lsl #28",
"str w20, [x28, #728]"
@ -2011,7 +1987,7 @@
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
"and w20, w20, w21",
"and w20, w21, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w23, w20, lsl #28",
"str w20, [x28, #728]"
@ -2056,7 +2032,7 @@
]
},
"cmp al, 1": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 15,
"Optimal": "No",
"Comment": "0x3C",
"ExpectedArm64ASM": [
@ -2071,16 +2047,14 @@
"cmp w20, #0x1 (1)",
"cset x23, lo",
"orr w22, w22, w23, lsl #29",
"eor w23, w20, #0x1",
"eor w20, w21, w20",
"and w20, w23, w20",
"bic w20, w20, w21",
"ubfx w20, w20, #7, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
]
},
"cmp ax, 1": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 15,
"Optimal": "No",
"Comment": "0x3D",
"ExpectedArm64ASM": [
@ -2095,9 +2069,7 @@
"cmp w20, #0x1 (1)",
"cset x23, lo",
"orr w22, w22, w23, lsl #29",
"eor w23, w20, #0x1",
"eor w20, w21, w20",
"and w20, w23, w20",
"bic w20, w20, w21",
"ubfx w20, w20, #15, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
@ -3482,7 +3454,7 @@
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
"and w20, w20, w21",
"and w20, w21, w20",
"ubfx w20, w20, #7, #1",
"orr w20, w23, w20, lsl #28",
"str w20, [x28, #728]"
@ -3518,7 +3490,7 @@
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
"and w20, w20, w21",
"and w20, w21, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w23, w20, lsl #28",
"str w20, [x28, #728]"
@ -3604,7 +3576,7 @@
"orr w24, w24, w25, lsl #29",
"eor w21, w22, w21",
"eor w22, w23, w22",
"and w21, w21, w22",
"and w21, w22, w21",
"ubfx w21, w21, #7, #1",
"orr w21, w24, w21, lsl #28",
"str w21, [x28, #728]",
@ -3641,7 +3613,7 @@
"orr w24, w24, w25, lsl #29",
"eor w21, w22, w21",
"eor w22, w23, w22",
"and w21, w21, w22",
"and w21, w22, w21",
"ubfx w21, w21, #15, #1",
"orr w21, w24, w21, lsl #28",
"str w21, [x28, #728]",
@ -3734,7 +3706,7 @@
"orr w24, w24, w25, lsl #29",
"eor w21, w22, w21",
"eor w22, w23, w22",
"and w21, w21, w22",
"and w21, w22, w21",
"ubfx w21, w21, #7, #1",
"orr w21, w24, w21, lsl #28",
"str w21, [x28, #728]",
@ -3771,7 +3743,7 @@
"orr w24, w24, w25, lsl #29",
"eor w21, w22, w21",
"eor w22, w23, w22",
"and w21, w21, w22",
"and w21, w22, w21",
"ubfx w21, w21, #15, #1",
"orr w21, w24, w21, lsl #28",
"str w21, [x28, #728]",
@ -4203,7 +4175,7 @@
"orr w23, w23, w24, lsl #29",
"eor w21, w20, w21",
"eor w20, w22, w20",
"and w20, w21, w20",
"and w20, w20, w21",
"ubfx w20, w20, #7, #1",
"orr w20, w23, w20, lsl #28",
"str w20, [x28, #728]"
@ -4235,7 +4207,7 @@
"orr w23, w23, w24, lsl #29",
"eor w21, w20, w21",
"eor w20, w22, w20",
"and w20, w21, w20",
"and w20, w20, w21",
"ubfx w20, w20, #15, #1",
"orr w20, w23, w20, lsl #28",
"str w20, [x28, #728]"
@ -4312,7 +4284,7 @@
"orr w24, w24, w25, lsl #29",
"eor w22, w21, w22",
"eor w21, w23, w21",
"and w21, w22, w21",
"and w21, w21, w22",
"ubfx w21, w21, #7, #1",
"orr w21, w24, w21, lsl #28",
"str w21, [x28, #728]",
@ -4348,7 +4320,7 @@
"orr w24, w24, w25, lsl #29",
"eor w22, w21, w22",
"eor w21, w23, w21",
"and w21, w22, w21",
"and w21, w21, w22",
"ubfx w21, w21, #15, #1",
"orr w21, w24, w21, lsl #28",
"str w21, [x28, #728]",
@ -4437,7 +4409,7 @@
"orr w24, w24, w25, lsl #29",
"eor w22, w21, w22",
"eor w21, w23, w21",
"and w21, w22, w21",
"and w21, w21, w22",
"ubfx w21, w21, #7, #1",
"orr w21, w24, w21, lsl #28",
"str w21, [x28, #728]",
@ -4473,7 +4445,7 @@
"orr w24, w24, w25, lsl #29",
"eor w22, w21, w22",
"eor w21, w23, w21",
"and w21, w22, w21",
"and w21, w21, w22",
"ubfx w21, w21, #15, #1",
"orr w21, w24, w21, lsl #28",
"str w21, [x28, #728]",

View File

@ -13,7 +13,7 @@
],
"Instructions": {
"add al, 1": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "GROUP1 0x80 /0",
"ExpectedArm64ASM": [
@ -29,9 +29,7 @@
"cmp w21, #0x1 (1)",
"cset x23, lo",
"orr w22, w22, w23, lsl #29",
"eor w23, w20, #0x1",
"eor w20, w21, w20",
"bic w20, w20, w23",
"bic w20, w21, w20",
"ubfx w20, w20, #7, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
@ -53,7 +51,7 @@
]
},
"adc al, 1": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Optimal": "No",
"Comment": "GROUP1 0x80 /2",
"ExpectedArm64ASM": [
@ -77,16 +75,14 @@
"cmp x21, #0x1 (1)",
"csel x21, x25, x24, eq",
"orr w21, w23, w21, lsl #29",
"eor w23, w22, #0x1",
"eor w20, w20, w22",
"bic w20, w20, w23",
"bic w20, w20, w22",
"ubfx w20, w20, #7, #1",
"orr w20, w21, w20, lsl #28",
"str w20, [x28, #728]"
]
},
"sbb al, 1": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Optimal": "No",
"Comment": "GROUP1 0x80 /3",
"ExpectedArm64ASM": [
@ -110,9 +106,7 @@
"cmp x21, #0x1 (1)",
"csel x21, x25, x24, eq",
"orr w21, w23, w21, lsl #29",
"eor w23, w22, #0x1",
"eor w20, w20, w22",
"and w20, w23, w20",
"bic w20, w22, w20",
"ubfx w20, w20, #7, #1",
"orr w20, w21, w20, lsl #28",
"str w20, [x28, #728]"
@ -134,7 +128,7 @@
]
},
"sub al, 1": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "GROUP1 0x80 /5",
"ExpectedArm64ASM": [
@ -150,9 +144,7 @@
"cmp w20, #0x1 (1)",
"cset x23, lo",
"orr w22, w22, w23, lsl #29",
"eor w23, w20, #0x1",
"eor w20, w21, w20",
"and w20, w23, w20",
"bic w20, w20, w21",
"ubfx w20, w20, #7, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
@ -174,7 +166,7 @@
]
},
"cmp al, 1": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 15,
"Optimal": "No",
"Comment": "GROUP1 0x80 /7",
"ExpectedArm64ASM": [
@ -189,16 +181,14 @@
"cmp w20, #0x1 (1)",
"cset x23, lo",
"orr w22, w22, w23, lsl #29",
"eor w23, w20, #0x1",
"eor w20, w21, w20",
"and w20, w23, w20",
"bic w20, w20, w21",
"ubfx w20, w20, #7, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
]
},
"add ax, 256": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "GROUP1 0x81 /0",
"ExpectedArm64ASM": [
@ -214,9 +204,7 @@
"cmp w21, #0x100 (256)",
"cset x23, lo",
"orr w22, w22, w23, lsl #29",
"eor w23, w20, #0x100",
"eor w20, w21, w20",
"bic w20, w20, w23",
"bic w20, w21, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
@ -276,7 +264,7 @@
]
},
"adc eax, 256": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 21,
"Optimal": "No",
"Comment": "GROUP1 0x81 /2",
"ExpectedArm64ASM": [
@ -297,16 +285,14 @@
"cmp x21, #0x1 (1)",
"csel x21, x24, x23, eq",
"orr w20, w20, w21, lsl #29",
"eor w21, w22, #0x100",
"eor w22, w4, w22",
"bic w21, w22, w21",
"bic w21, w4, w22",
"lsr w21, w21, #31",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
]
},
"adc rax, 256": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 21,
"Optimal": "No",
"Comment": "GROUP1 0x81 /2",
"ExpectedArm64ASM": [
@ -327,16 +313,14 @@
"cmp x21, #0x1 (1)",
"csel x21, x24, x23, eq",
"orr w20, w20, w21, lsl #29",
"eor x21, x22, #0x100",
"eor x22, x4, x22",
"bic x21, x22, x21",
"bic x21, x4, x22",
"lsr x21, x21, #63",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
]
},
"sbb eax, 256": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 21,
"Optimal": "No",
"Comment": "GROUP1 0x81 /3",
"ExpectedArm64ASM": [
@ -357,16 +341,14 @@
"cmp x21, #0x1 (1)",
"csel x21, x24, x23, eq",
"orr w20, w20, w21, lsl #29",
"eor w21, w22, #0x100",
"eor w22, w4, w22",
"and w21, w21, w22",
"bic w21, w22, w4",
"lsr w21, w21, #31",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
]
},
"sbb rax, 256": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 21,
"Optimal": "No",
"Comment": "GROUP1 0x81 /3",
"ExpectedArm64ASM": [
@ -387,9 +369,7 @@
"cmp x21, #0x1 (1)",
"csel x21, x24, x23, eq",
"orr w20, w20, w21, lsl #29",
"eor x21, x22, #0x100",
"eor x22, x4, x22",
"and x21, x21, x22",
"bic x21, x22, x4",
"lsr x21, x21, #63",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
@ -505,7 +485,7 @@
]
},
"add ax, 1": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "GROUP1 0x83 /0",
"ExpectedArm64ASM": [
@ -521,9 +501,7 @@
"cmp w21, #0x1 (1)",
"cset x23, lo",
"orr w22, w22, w23, lsl #29",
"eor w23, w20, #0x1",
"eor w20, w21, w20",
"bic w20, w20, w23",
"bic w20, w21, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w22, w20, lsl #28",
"str w20, [x28, #728]"
@ -583,7 +561,7 @@
]
},
"adc eax, 1": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 21,
"Optimal": "No",
"Comment": "GROUP1 0x83 /2",
"ExpectedArm64ASM": [
@ -604,16 +582,14 @@
"cmp x21, #0x1 (1)",
"csel x21, x24, x23, eq",
"orr w20, w20, w21, lsl #29",
"eor w21, w22, #0x1",
"eor w22, w4, w22",
"bic w21, w22, w21",
"bic w21, w4, w22",
"lsr w21, w21, #31",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
]
},
"adc rax, 1": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 21,
"Optimal": "No",
"Comment": "GROUP1 0x83 /2",
"ExpectedArm64ASM": [
@ -634,16 +610,14 @@
"cmp x21, #0x1 (1)",
"csel x21, x24, x23, eq",
"orr w20, w20, w21, lsl #29",
"eor x21, x22, #0x1",
"eor x22, x4, x22",
"bic x21, x22, x21",
"bic x21, x4, x22",
"lsr x21, x21, #63",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
]
},
"sbb eax, 1": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 21,
"Optimal": "No",
"Comment": "GROUP1 0x83 /3",
"ExpectedArm64ASM": [
@ -664,16 +638,14 @@
"cmp x21, #0x1 (1)",
"csel x21, x24, x23, eq",
"orr w20, w20, w21, lsl #29",
"eor w21, w22, #0x1",
"eor w22, w4, w22",
"and w21, w21, w22",
"bic w21, w22, w4",
"lsr w21, w21, #31",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
]
},
"sbb rax, 1": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 21,
"Optimal": "No",
"Comment": "GROUP1 0x83 /3",
"ExpectedArm64ASM": [
@ -694,9 +666,7 @@
"cmp x21, #0x1 (1)",
"csel x21, x24, x23, eq",
"orr w20, w20, w21, lsl #29",
"eor x21, x22, #0x1",
"eor x22, x4, x22",
"and x21, x21, x22",
"bic x21, x22, x4",
"lsr x21, x21, #63",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
@ -2784,7 +2754,7 @@
"cmp x20, x21",
"cset x20, lo",
"orr w20, w23, w20, lsl #29",
"and w21, w21, w22",
"and w21, w22, w21",
"ubfx w21, w21, #7, #1",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
@ -2950,7 +2920,7 @@
"cmp x20, x21",
"cset x20, lo",
"orr w20, w23, w20, lsl #29",
"and w21, w21, w22",
"and w21, w22, w21",
"ubfx w21, w21, #15, #1",
"orr w20, w20, w21, lsl #28",
"str w20, [x28, #728]"
@ -3240,7 +3210,7 @@
]
},
"inc al": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "GROUP3 0xfe /0",
"ExpectedArm64ASM": [
@ -3255,9 +3225,7 @@
"lsl w23, w21, #24",
"tst w23, w23",
"mrs x23, nzcv",
"eor w24, w20, #0x1",
"eor w20, w21, w20",
"bic w20, w20, w24",
"bic w20, w21, w20",
"ubfx w20, w20, #7, #1",
"orr w20, w23, w20, lsl #28",
"orr w20, w20, w22, lsl #29",
@ -3265,7 +3233,7 @@
]
},
"dec al": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "GROUP3 0xfe /1",
"ExpectedArm64ASM": [
@ -3280,9 +3248,7 @@
"lsl w23, w21, #24",
"tst w23, w23",
"mrs x23, nzcv",
"eor w24, w20, #0x1",
"eor w20, w21, w20",
"and w20, w24, w20",
"bic w20, w20, w21",
"ubfx w20, w20, #7, #1",
"orr w20, w23, w20, lsl #28",
"orr w20, w20, w22, lsl #29",
@ -3290,7 +3256,7 @@
]
},
"inc ax": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "GROUP4 0xfe /0",
"ExpectedArm64ASM": [
@ -3305,9 +3271,7 @@
"lsl w23, w21, #16",
"tst w23, w23",
"mrs x23, nzcv",
"eor w24, w20, #0x1",
"eor w20, w21, w20",
"bic w20, w20, w24",
"bic w20, w21, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w23, w20, lsl #28",
"orr w20, w20, w22, lsl #29",
@ -3349,7 +3313,7 @@
]
},
"dec ax": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "GROUP4 0xfe /1",
"ExpectedArm64ASM": [
@ -3364,9 +3328,7 @@
"lsl w23, w21, #16",
"tst w23, w23",
"mrs x23, nzcv",
"eor w24, w20, #0x1",
"eor w20, w21, w20",
"and w20, w24, w20",
"bic w20, w20, w21",
"ubfx w20, w20, #15, #1",
"orr w20, w23, w20, lsl #28",
"orr w20, w20, w22, lsl #29",

View File

@ -286,7 +286,7 @@
]
},
"inc ax": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "0x40",
"ExpectedArm64ASM": [
@ -301,9 +301,7 @@
"lsl w23, w21, #16",
"tst w23, w23",
"mrs x23, nzcv",
"eor w24, w20, #0x1",
"eor w20, w21, w20",
"bic w20, w20, w24",
"bic w20, w21, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w23, w20, lsl #28",
"orr w20, w20, w22, lsl #29",
@ -328,7 +326,7 @@
]
},
"dec ax": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Optimal": "No",
"Comment": "0x48",
"ExpectedArm64ASM": [
@ -343,9 +341,7 @@
"lsl w23, w21, #16",
"tst w23, w23",
"mrs x23, nzcv",
"eor w24, w20, #0x1",
"eor w20, w21, w20",
"and w20, w24, w20",
"bic w20, w20, w21",
"ubfx w20, w20, #15, #1",
"orr w20, w23, w20, lsl #28",
"orr w20, w20, w22, lsl #29",

View File

@ -2318,7 +2318,7 @@
"orr w21, w21, w24, lsl #29",
"eor w23, w22, w23",
"eor w20, w20, w22",
"and w20, w23, w20",
"and w20, w20, w23",
"ubfx w20, w20, #7, #1",
"orr w20, w21, w20, lsl #28",
"str w20, [x28, #728]"
@ -2348,7 +2348,7 @@
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
"and w20, w20, w21",
"and w20, w21, w20",
"ubfx w20, w20, #7, #1",
"orr w20, w23, w20, lsl #28",
"str w20, [x28, #728]"
@ -2381,7 +2381,7 @@
"orr w21, w21, w24, lsl #29",
"eor w23, w22, w23",
"eor w20, w20, w22",
"and w20, w23, w20",
"and w20, w20, w23",
"ubfx w20, w20, #15, #1",
"orr w20, w21, w20, lsl #28",
"str w20, [x28, #728]"
@ -2411,7 +2411,7 @@
"orr w23, w23, w24, lsl #29",
"eor w20, w21, w20",
"eor w21, w22, w21",
"and w20, w20, w21",
"and w20, w21, w20",
"ubfx w20, w20, #15, #1",
"orr w20, w23, w20, lsl #28",
"str w20, [x28, #728]"

View File

@ -1498,7 +1498,7 @@
]
},
"fxsave [rax]": {
"ExpectedInstructionCount": 68,
"ExpectedInstructionCount": 58,
"Optimal": "No",
"Comment": "GROUP15 0x0F 0xAE /0",
"ExpectedArm64ASM": [
@ -1552,19 +1552,9 @@
"str q31, [x4, #400]",
"mov w20, #0x1f80",
"mrs x21, fpcr",
"lsr x21, x21, #22",
"and x0, x21, #0x3",
"cmp x0, #0x1 (1)",
"mov w2, #0x2",
"csel x1, x2, xzr, eq",
"cmp x0, #0x2 (2)",
"mov w2, #0x1",
"csel x1, x2, x1, eq",
"cmp x0, #0x3 (3)",
"mov w2, #0x3",
"csel x1, x2, x1, eq",
"orr x21, x21, x1",
"bfxil x21, x1, #0, #2",
"ubfx x21, x21, #22, #3",
"rbit w0, w21",
"bfi x21, x0, #30, #2",
"bfi w20, w21, #13, #3",
"add x21, x4, #0x18 (24)",
"str w20, [x4, #24]",
@ -1589,7 +1579,7 @@
]
},
"fxrstor [rax]": {
"ExpectedInstructionCount": 64,
"ExpectedInstructionCount": 56,
"Optimal": "No",
"Comment": "GROUP15 0x0F 0xAE /1",
"ExpectedArm64ASM": [
@ -1642,16 +1632,8 @@
"ldr q31, [x4, #400]",
"ldr w20, [x4, #24]",
"ubfx w20, w20, #13, #3",
"and x0, x20, #0x3",
"cmp x0, #0x2 (2)",
"mov w2, #0x1",
"csel x1, x2, xzr, eq",
"cmp x0, #0x1 (1)",
"mov w2, #0x2",
"csel x1, x2, x1, eq",
"cmp x0, #0x3 (3)",
"mov w2, #0x3",
"csel x1, x2, x1, eq",
"rbit w1, w20",
"lsr w1, w1, #30",
"mrs x0, fpcr",
"bfi x0, x1, #22, #2",
"lsr x1, x20, #2",
@ -1676,22 +1658,14 @@
]
},
"ldmxcsr [rax]": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 9,
"Optimal": "No",
"Comment": "GROUP15 0x0F 0xAE /2",
"ExpectedArm64ASM": [
"ldr w20, [x4]",
"ubfx w20, w20, #13, #3",
"and x0, x20, #0x3",
"cmp x0, #0x2 (2)",
"mov w2, #0x1",
"csel x1, x2, xzr, eq",
"cmp x0, #0x1 (1)",
"mov w2, #0x2",
"csel x1, x2, x1, eq",
"cmp x0, #0x3 (3)",
"mov w2, #0x3",
"csel x1, x2, x1, eq",
"rbit w1, w20",
"lsr w1, w1, #30",
"mrs x0, fpcr",
"bfi x0, x1, #22, #2",
"lsr x1, x20, #2",
@ -1717,25 +1691,15 @@
]
},
"stmxcsr [rax]": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 7,
"Optimal": "No",
"Comment": "GROUP15 0x0F 0xAE /3",
"ExpectedArm64ASM": [
"mov w20, #0x1f80",
"mrs x21, fpcr",
"lsr x21, x21, #22",
"and x0, x21, #0x3",
"cmp x0, #0x1 (1)",
"mov w2, #0x2",
"csel x1, x2, xzr, eq",
"cmp x0, #0x2 (2)",
"mov w2, #0x1",
"csel x1, x2, x1, eq",
"cmp x0, #0x3 (3)",
"mov w2, #0x3",
"csel x1, x2, x1, eq",
"orr x21, x21, x1",
"bfxil x21, x1, #0, #2",
"ubfx x21, x21, #22, #3",
"rbit w0, w21",
"bfi x21, x0, #30, #2",
"bfi w20, w21, #13, #3",
"str w20, [x4]"
]
@ -1758,7 +1722,7 @@
]
},
"xsave [rax]": {
"ExpectedInstructionCount": 81,
"ExpectedInstructionCount": 71,
"Optimal": "No",
"Comment": "GROUP15 0x0F 0xAE /4",
"ExpectedArm64ASM": [
@ -1820,22 +1784,12 @@
"str q31, [x21, #400]",
"ubfx x22, x20, #1, #2",
"cbnz x22, #+0x8",
"b #+0x54",
"b #+0x2c",
"mov w22, #0x1f80",
"mrs x23, fpcr",
"lsr x23, x23, #22",
"and x0, x23, #0x3",
"cmp x0, #0x1 (1)",
"mov w2, #0x2",
"csel x1, x2, xzr, eq",
"cmp x0, #0x2 (2)",
"mov w2, #0x1",
"csel x1, x2, x1, eq",
"cmp x0, #0x3 (3)",
"mov w2, #0x3",
"csel x1, x2, x1, eq",
"orr x23, x23, x1",
"bfxil x23, x1, #0, #2",
"ubfx x23, x23, #22, #3",
"rbit w0, w23",
"bfi x23, x0, #30, #2",
"bfi w22, w23, #13, #3",
"add x23, x21, #0x18 (24)",
"str w22, [x21, #24]",
@ -1854,7 +1808,7 @@
]
},
"xrstor [rax]": {
"ExpectedInstructionCount": 112,
"ExpectedInstructionCount": 104,
"Optimal": "No",
"Comment": "GROUP15 0x0F 0xAE /5",
"ExpectedArm64ASM": [
@ -1951,19 +1905,11 @@
"mov v31.16b, v16.16b",
"ubfx x21, x21, #1, #2",
"cbnz x21, #+0x8",
"b #+0x4c",
"b #+0x2c",
"ldr w20, [x20, #24]",
"ubfx w20, w20, #13, #3",
"and x0, x20, #0x3",
"cmp x0, #0x2 (2)",
"mov w2, #0x1",
"csel x1, x2, xzr, eq",
"cmp x0, #0x1 (1)",
"mov w2, #0x2",
"csel x1, x2, x1, eq",
"cmp x0, #0x3 (3)",
"mov w2, #0x3",
"csel x1, x2, x1, eq",
"rbit w1, w20",
"lsr w1, w1, #30",
"mrs x0, fpcr",
"bfi x0, x1, #22, #2",
"lsr x1, x20, #2",

View File

@ -750,7 +750,7 @@
]
},
"vldmxcsr [rax]": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 9,
"Optimal": "No",
"Comment": [
"Map group 15 0b010"
@ -758,16 +758,8 @@
"ExpectedArm64ASM": [
"ldr w20, [x4]",
"ubfx w20, w20, #13, #3",
"and x0, x20, #0x3",
"cmp x0, #0x2 (2)",
"mov w2, #0x1",
"csel x1, x2, xzr, eq",
"cmp x0, #0x1 (1)",
"mov w2, #0x2",
"csel x1, x2, x1, eq",
"cmp x0, #0x3 (3)",
"mov w2, #0x3",
"csel x1, x2, x1, eq",
"rbit w1, w20",
"lsr w1, w1, #30",
"mrs x0, fpcr",
"bfi x0, x1, #22, #2",
"lsr x1, x20, #2",
@ -776,7 +768,7 @@
]
},
"vstmxcsr [rax]": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 7,
"Optimal": "No",
"Comment": [
"Map group 15 0b011"
@ -784,111 +776,91 @@
"ExpectedArm64ASM": [
"mov w20, #0x1f80",
"mrs x21, fpcr",
"lsr x21, x21, #22",
"and x0, x21, #0x3",
"cmp x0, #0x1 (1)",
"mov w2, #0x2",
"csel x1, x2, xzr, eq",
"cmp x0, #0x2 (2)",
"mov w2, #0x1",
"csel x1, x2, x1, eq",
"cmp x0, #0x3 (3)",
"mov w2, #0x3",
"csel x1, x2, x1, eq",
"orr x21, x21, x1",
"bfxil x21, x1, #0, #2",
"ubfx x21, x21, #22, #3",
"rbit w0, w21",
"bfi x21, x0, #30, #2",
"bfi w20, w21, #13, #3",
"str w20, [x4]"
]
},
"blsr eax, ebx": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 10,
"Optimal": "No",
"Comment": [
"Map group 17 0b001 32-bit"
],
"ExpectedArm64ASM": [
"mov w20, #0x1",
"mov w21, w7",
"sub x22, x21, #0x1 (1)",
"and x22, x22, x21",
"mov w4, w22",
"mov w23, #0x0",
"tst w22, w22",
"mrs x22, nzcv",
"cmp x21, #0x0 (0)",
"csel x20, x23, x20, eq",
"orr w20, w22, w20, lsl #29",
"mov w20, w7",
"sub x21, x20, #0x1 (1)",
"and x21, x21, x20",
"mov w4, w21",
"tst w21, w21",
"mrs x21, nzcv",
"cmp x20, #0x0 (0)",
"cset x20, ne",
"orr w20, w21, w20, lsl #29",
"str w20, [x28, #728]"
]
},
"blsr rax, rbx": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 8,
"Optimal": "No",
"Comment": [
"Map group 17 0b001 64-bit"
],
"ExpectedArm64ASM": [
"mov w20, #0x1",
"sub x21, x7, #0x1 (1)",
"and x4, x21, x7",
"mov w21, #0x0",
"sub x20, x7, #0x1 (1)",
"and x4, x20, x7",
"tst x4, x4",
"mrs x22, nzcv",
"mrs x20, nzcv",
"cmp x7, #0x0 (0)",
"csel x20, x21, x20, eq",
"orr w20, w22, w20, lsl #29",
"cset x21, ne",
"orr w20, w20, w21, lsl #29",
"str w20, [x28, #728]"
]
},
"blsmsk eax, ebx": {
"ExpectedInstructionCount": 15,
"ExpectedInstructionCount": 13,
"Optimal": "No",
"Comment": [
"Map group 17 0b010 32-bit"
],
"ExpectedArm64ASM": [
"mov w20, #0x1",
"mov w21, w7",
"sub x22, x21, #0x1 (1)",
"eor x22, x22, x21",
"mov w4, w22",
"mov w22, #0x0",
"mov w23, #0x50000000",
"ldr w24, [x28, #728]",
"bic x23, x24, x23",
"cmp x21, #0x0 (0)",
"csel x20, x22, x20, eq",
"mov w0, w23",
"mov w20, w7",
"sub x21, x20, #0x1 (1)",
"eor x21, x21, x20",
"mov w4, w21",
"mov w21, #0x50000000",
"ldr w22, [x28, #728]",
"bic x21, x22, x21",
"cmp x20, #0x0 (0)",
"cset x20, ne",
"mov w0, w21",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"str w20, [x28, #728]"
]
},
"blsmsk rax, rbx": {
"ExpectedInstructionCount": 13,
"ExpectedInstructionCount": 9,
"Optimal": "No",
"Comment": [
"Map group 17 0b010 64-bit"
],
"ExpectedArm64ASM": [
"mov w20, #0x1",
"sub x21, x7, #0x1 (1)",
"eor x4, x21, x7",
"mov w21, #0x0",
"mov w22, #0x50000000",
"ldr w23, [x28, #728]",
"bic x22, x23, x22",
"sub x20, x7, #0x1 (1)",
"eor x4, x20, x7",
"mov w20, #0x50000000",
"ldr w21, [x28, #728]",
"bic x20, x21, x20",
"cmp x7, #0x0 (0)",
"csel x20, x21, x20, eq",
"mov w0, w22",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"cset x21, ne",
"bfi w20, w21, #29, #1",
"str w20, [x28, #728]"
]
},
"blsi eax, ebx": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 9,
"Optimal": "No",
"Comment": [
"Map group 17 0b011 32-bit"
@ -897,18 +869,16 @@
"mov w20, w7",
"neg w21, w20",
"and w4, w20, w21",
"mov w20, #0x0",
"mov w21, #0x1",
"tst w4, w4",
"mrs x22, nzcv",
"mrs x20, nzcv",
"cmp x4, #0x0 (0)",
"csel x20, x20, x21, eq",
"orr w20, w22, w20, lsl #29",
"cset x21, ne",
"orr w20, w20, w21, lsl #29",
"str w20, [x28, #728]"
]
},
"blsi rax, rbx": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 8,
"Optimal": "No",
"Comment": [
"Map group 17 0b011 64-bit"
@ -916,13 +886,11 @@
"ExpectedArm64ASM": [
"neg x20, x7",
"and x4, x7, x20",
"mov w20, #0x0",
"mov w21, #0x1",
"tst x4, x4",
"mrs x22, nzcv",
"mrs x20, nzcv",
"cmp x4, #0x0 (0)",
"csel x20, x20, x21, eq",
"orr w20, w22, w20, lsl #29",
"cset x21, ne",
"orr w20, w20, w21, lsl #29",
"str w20, [x28, #728]"
]
}