Merge pull request #3368 from bylaws/preprcr

FEXCore: Fix RCL/RCR shift wraparound behaviour
This commit is contained in:
Ryan Houdek 2024-01-21 13:44:49 -08:00 committed by GitHub
commit c0be974272
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 512 additions and 283 deletions

View File

@ -2421,63 +2421,74 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) {
return;
}
const auto Mask = (Size == 64) ? 0x3F : 0x1F;
// Calculate flags early.
CalculateDeferredFlags();
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);
OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags);
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags);
const auto OpSize = OpSizeFromSrc(Op);
CalculateFlags_ShiftVariable(Src, [this, CF, Op, Size, Src](){
const auto OpSize = OpSizeFromSrc(Op);
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags);
// Res = Src >> Shift
OrderedNode *Res = _Lshr(OpSize, Dest, Src);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);
// Res = Src >> Shift
OrderedNode *Res = _Lshr(OpSize, Dest, Src);
uint64_t Const;
if (IsValueConstant(WrapNode(Src), &Const)) {
Const &= Mask;
if (!Const)
return;
uint64_t Const;
if (!IsValueConstant(WrapNode(Src), &Const)) {
auto Zero = _Constant(Size, 0);
auto One = _Constant(Size, 1);
InvalidateDeferredFlags();
// Res |= (Src << (Size - Shift + 1));
OrderedNode *SrcShl = _Sub(OpSize, _Constant(Size, Size + 1), Src);
auto TmpHigher = _Lshl(OpSize, Dest, SrcShl);
// Constant folded version of the above, with fused shifts.
if (Const > 1)
Res = _Orlshl(OpSize, Res, Dest, Size + 1 - Const);
auto CompareResult = _Select(FEXCore::IR::COND_UGT, Src, One, TmpHigher, Zero);
Res = _Or(OpSize, Res, CompareResult);
// Our new CF will be bit (Shift - 1) of the source.
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Dest, Const - 1, true);
// Our new CF will be bit (Shift - 1) of the source. this is hoisted up to
// avoid the need to copy the source.
auto NewCF = _Lshr(OpSize, Dest, _Sub(OpSize, Src, One));
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(NewCF, 0, true);
// Since shift != 0 we can inject the CF
Res = _Orlshl(OpSize, Res, CF, Size - Const);
// Since shift != 0 we can inject the CF
OrderedNode *CFShl = _Sub(OpSize, _Constant(Size, Size), Src);
Res = _Or(OpSize, Res, _Lshl(OpSize::i64Bit, CF, CFShl));
// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
if (Const == 1) {
auto Xor = _XorShift(OpSize, Res, Res, ShiftType::LSR, 1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Xor, Size - 2, true);
} else {
// Constant folded version of the above, with fused shifts.
if (Const > 1)
Res = _Orlshl(OpSize, Res, Dest, Size + 1 - Const);
// Our new CF will be bit (Shift - 1) of the source.
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Dest, Const - 1, true);
// Since shift != 0 we can inject the CF
Res = _Orlshl(OpSize, Res, CF, Size - Const);
// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
if (Const == 1) {
auto Xor = _XorShift(OpSize, Res, Res, ShiftType::LSR, 1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Xor, Size - 2, true);
}
}
StoreResult(GPRClass, Op, Res, -1);
return;
}
OrderedNode *SrcMasked = _And(OpSize, Src, _Constant(Size, Mask));
CalculateFlags_ShiftVariable(SrcMasked, [this, CF, Op, Size, OpSize, SrcMasked, Dest, &Res](){
auto Zero = _Constant(Size, 0);
auto One = _Constant(Size, 1);
// Res |= (SrcMasked << (Size - Shift + 1));
OrderedNode *SrcMaskedShl = _Sub(OpSize, _Constant(Size, Size + 1), SrcMasked);
auto TmpHigher = _Lshl(OpSize, Dest, SrcMaskedShl);
auto CompareResult = _Select(FEXCore::IR::COND_UGT, SrcMasked, One, TmpHigher, Zero);
Res = _Or(OpSize, Res, CompareResult);
// Our new CF will be bit (Shift - 1) of the source. this is hoisted up to
// avoid the need to copy the source.
auto NewCF = _Lshr(OpSize, Dest, _Sub(OpSize, SrcMasked, One));
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(NewCF, 0, true);
// Since shift != 0 we can inject the CF
OrderedNode *CFShl = _Sub(OpSize, _Constant(Size, Size), SrcMasked);
Res = _Or(OpSize, Res, _Lshl(OpSize::i64Bit, CF, CFShl));
// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
auto Xor = _XorShift(OpSize, Res, Res, ShiftType::LSR, 1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Xor, Size - 2, true);
StoreResult(GPRClass, Op, Res, -1);
});
}
@ -2607,65 +2618,77 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) {
return;
}
const auto Mask = (Size == 64) ? 0x3F : 0x1F;
// Calculate flags early.
CalculateDeferredFlags();
OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags);
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags);
const auto OpSize = OpSizeFromSrc(Op);
// Res = Src << Shift
OrderedNode *Res = _Lshl(OpSize, Dest, Src);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);
CalculateFlags_ShiftVariable(Src, [this, CF, Op, Size, Src](){
// Res = Src << Shift
const auto OpSize = OpSizeFromSrc(Op);
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags);
OrderedNode *Res = _Lshl(OpSize, Dest, Src);
uint64_t Const;
if (IsValueConstant(WrapNode(Src), &Const)) {
Const &= Mask;
if (!Const)
return;
uint64_t Const;
if (!IsValueConstant(WrapNode(Src), &Const)) {
// Res |= (Src << (Size - Shift + 1));
OrderedNode *SrcShl = _Sub(OpSize, _Constant(Size, Size + 1), Src);
auto TmpHigher = _Lshr(OpSize, Dest, SrcShl);
InvalidateDeferredFlags();
auto One = _Constant(Size, 1);
auto Zero = _Constant(Size, 0);
// Res |= (Src << (Size - Shift + 1));
if (Const > 1)
Res = _Orlshr(OpSize, Res, Dest, Size + 1 - Const);
auto CompareResult = _Select(FEXCore::IR::COND_UGT, Src, One, TmpHigher, Zero);
Res = _Or(OpSize, Res, CompareResult);
// Our new CF will be bit (Shift - 1) of the source
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Dest, Size - Const, true);
// Our new CF will be bit (Shift - 1) of the source
auto NewCF = _Lshr(OpSize, Dest, _Sub(OpSize, _Constant(Size, Size), Src));
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(NewCF, 0, true);
// Since Shift != 0 we can inject the CF
Res = _Orlshl(OpSize, Res, CF, Const - 1);
// Since Shift != 0 we can inject the CF
OrderedNode *CFShl = _Sub(OpSize, Src, _Constant(Size, 1));
auto TmpCF = _Lshl(OpSize::i64Bit, CF, CFShl);
Res = _Or(OpSize, Res, TmpCF);
// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
//
// Note that NewCF has garbage in the upper bits, but we ignore them here
// and mask as part of the set after.
auto NewOF = _XorShift(OpSize, Res, NewCF, ShiftType::LSL, Size - 1);
// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
if (Const == 1) {
auto NewOF = _Xor(OpSize, Res, Dest);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, Size - 1, true);
} else {
// Res |= (Src << (Size - Shift + 1));
if (Const > 1)
Res = _Orlshr(OpSize, Res, Dest, Size + 1 - Const);
// Our new CF will be bit (Shift - 1) of the source
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Dest, Size - Const, true);
// Since Shift != 0 we can inject the CF
Res = _Orlshl(OpSize, Res, CF, Const - 1);
// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
if (Const == 1) {
auto NewOF = _Xor(OpSize, Res, Dest);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, Size - 1, true);
}
}
StoreResult(GPRClass, Op, Res, -1);
return;
}
OrderedNode *SrcMasked = _And(OpSize, Src, _Constant(Size, Mask));
CalculateFlags_ShiftVariable(SrcMasked, [this, CF, Op, Size, OpSize, SrcMasked, Dest, &Res](){
// Res |= (SrcMasked << (Size - Shift + 1));
OrderedNode *SrcMaskedShl = _Sub(OpSize, _Constant(Size, Size + 1), SrcMasked);
auto TmpHigher = _Lshr(OpSize, Dest, SrcMaskedShl);
auto One = _Constant(Size, 1);
auto Zero = _Constant(Size, 0);
auto CompareResult = _Select(FEXCore::IR::COND_UGT, SrcMasked, One, TmpHigher, Zero);
Res = _Or(OpSize, Res, CompareResult);
// Our new CF will be bit (Shift - 1) of the source
auto NewCF = _Lshr(OpSize, Dest, _Sub(OpSize, _Constant(Size, Size), SrcMasked));
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(NewCF, 0, true);
// Since Shift != 0 we can inject the CF
OrderedNode *CFShl = _Sub(OpSize, SrcMasked, _Constant(Size, 1));
auto TmpCF = _Lshl(OpSize::i64Bit, CF, CFShl);
Res = _Or(OpSize, Res, TmpCF);
// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
//
// Note that NewCF has garbage in the upper bits, but we ignore them here
// and mask as part of the set after.
auto NewOF = _XorShift(OpSize, Res, NewCF, ShiftType::LSL, Size - 1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, Size - 1, true);
StoreResult(GPRClass, Op, Res, -1);
});
}

View File

@ -0,0 +1,50 @@
%ifdef CONFIG
{
"RegData": {
"RBX": "0x00000006",
"RDI": "0x00000004",
"RDX": "0x00000002",
"RSI": "0x00000000",
"R8": "0x0",
"R9": "0x0",
"R10": "0x1",
"R11": "0x1"
}
}
%endif
mov rbx, 0x00000001
mov rdi, 0x00000001
mov rdx, 0x40000000
mov rsi, 0x40000000
mov rcx, 34 ; Test wraparound
stc
rcl ebx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here
clc
rcl edi, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here
stc
rcl edx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here
clc
rcl esi, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here
hlt

View File

@ -0,0 +1,50 @@
%ifdef CONFIG
{
"RegData": {
"RBX": "0x00000001",
"RDI": "0x00000001",
"RDX": "0x40000000",
"RSI": "0x40000000",
"R8": "0x1",
"R9": "0x0",
"R10": "0x1",
"R11": "0x0"
}
}
%endif
mov rbx, 0x00000001
mov rdi, 0x00000001
mov rdx, 0x40000000
mov rsi, 0x40000000
mov rcx, 32 ; Test wraparound with zero shift
stc
rcl ebx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here
clc
rcl edi, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here
stc
rcl edx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here
clc
rcl esi, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here
hlt

View File

@ -0,0 +1,50 @@
%ifdef CONFIG
{
"RegData": {
"RBX": "0x40000000",
"RDI": "0x00000000",
"RDX": "0x60000000",
"RSI": "0x20000000",
"R8": "0x1",
"R9": "0x1",
"R10": "0x0",
"R11": "0x0"
}
}
%endif
mov rbx, 0x00000002
mov rdi, 0x00000002
mov rdx, 0x80000000
mov rsi, 0x80000000
mov rcx, 34 ; Test wraparound
stc
rcr ebx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here
clc
rcr edi, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here
stc
rcr edx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here
clc
rcr esi, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here
hlt

View File

@ -0,0 +1,50 @@
%ifdef CONFIG
{
"RegData": {
"RBX": "0x00000002",
"RDI": "0x00000002",
"RDX": "0x80000000",
"RSI": "0x80000000",
"R8": "0x1",
"R9": "0x0",
"R10": "0x1",
"R11": "0x0"
}
}
%endif
mov rbx, 0x00000002
mov rdi, 0x00000002
mov rdx, 0x80000000
mov rsi, 0x80000000
mov rcx, 32 ; Test wraparound with zero shift
stc
rcr ebx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here
clc
rcr edi, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here
stc
rcr edx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here
clc
rcr esi, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here
hlt

View File

@ -1230,23 +1230,23 @@
"ExpectedInstructionCount": 6,
"Comment": "GROUP2 0xC1 /2",
"ExpectedArm64ASM": [
"cset w20, hs",
"mov w21, w4",
"lsl w22, w21, #2",
"orr w22, w22, w21, lsr #31",
"rmif x21, #29, #nzCv",
"orr w4, w22, w20, lsl #1"
"mov w20, w4",
"lsl w21, w20, #2",
"cset w22, hs",
"orr w21, w21, w20, lsr #31",
"rmif x20, #29, #nzCv",
"orr w4, w21, w22, lsl #1"
]
},
"rcl rax, 2": {
"ExpectedInstructionCount": 5,
"Comment": "GROUP2 0xC1 /2",
"ExpectedArm64ASM": [
"cset w20, hs",
"lsl x21, x4, #2",
"orr x21, x21, x4, lsr #63",
"lsl x20, x4, #2",
"cset w21, hs",
"orr x20, x20, x4, lsr #63",
"rmif x4, #61, #nzCv",
"orr x4, x21, x20, lsl #1"
"orr x4, x20, x21, lsl #1"
]
},
"rcr ax, 2": {
@ -1278,23 +1278,23 @@
"ExpectedInstructionCount": 6,
"Comment": "GROUP2 0xC1 /3",
"ExpectedArm64ASM": [
"cset w20, hs",
"mov w21, w4",
"lsr w22, w21, #2",
"orr w22, w22, w21, lsl #31",
"rmif x21, #0, #nzCv",
"orr w4, w22, w20, lsl #30"
"mov w20, w4",
"lsr w21, w20, #2",
"cset w22, hs",
"orr w21, w21, w20, lsl #31",
"rmif x20, #0, #nzCv",
"orr w4, w21, w22, lsl #30"
]
},
"rcr rax, 2": {
"ExpectedInstructionCount": 5,
"Comment": "GROUP2 0xC1 /3",
"ExpectedArm64ASM": [
"cset w20, hs",
"lsr x21, x4, #2",
"orr x21, x21, x4, lsl #63",
"lsr x20, x4, #2",
"cset w21, hs",
"orr x20, x20, x4, lsl #63",
"rmif x4, #0, #nzCv",
"orr x4, x21, x20, lsl #62"
"orr x4, x20, x21, lsl #62"
]
},
"shl ax, 2": {
@ -2057,59 +2057,61 @@
]
},
"rcl eax, cl": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 24,
"Comment": "GROUP2 0xd3 /2",
"ExpectedArm64ASM": [
"mov w20, w5",
"cset w21, hs",
"cbz x20, #+0x54",
"mov w22, w4",
"lsl w23, w22, w20",
"mov w21, w4",
"lsl w22, w21, w20",
"cset w23, hs",
"and w20, w20, #0x1f",
"cbz x20, #+0x4c",
"mov w24, #0x21",
"sub w24, w24, w20",
"lsr w24, w22, w24",
"lsr w24, w21, w24",
"mov w25, #0x0",
"mrs x30, nzcv",
"cmp w20, #0x1 (1)",
"csel w24, w24, w25, hi",
"orr w23, w23, w24",
"orr w22, w22, w24",
"mov w24, #0x20",
"sub w24, w24, w20",
"lsr w22, w22, w24",
"lsr w21, w21, w24",
"msr nzcv, x30",
"rmif x22, #63, #nzCv",
"rmif x21, #63, #nzCv",
"sub w20, w20, #0x1 (1)",
"lsl x20, x21, x20",
"orr w4, w23, w20",
"eor w20, w4, w22, lsl #31",
"lsl x20, x23, x20",
"orr w4, w22, w20",
"eor w20, w4, w21, lsl #31",
"rmif x20, #31, #nzcV"
]
},
"rcl rax, cl": {
"ExpectedInstructionCount": 22,
"ExpectedInstructionCount": 23,
"Comment": "GROUP2 0xd3 /2",
"ExpectedArm64ASM": [
"mov x20, x5",
"cset w21, hs",
"cbz x20, #+0x50",
"lsl x22, x4, x20",
"mov w23, #0x41",
"sub x23, x23, x20",
"lsr x23, x4, x23",
"mov w24, #0x0",
"mrs x25, nzcv",
"cmp x20, #0x1 (1)",
"csel x23, x23, x24, hi",
"orr x22, x22, x23",
"mov w23, #0x40",
"sub x23, x23, x20",
"lsr x23, x4, x23",
"msr nzcv, x25",
"rmif x23, #63, #nzCv",
"sub x20, x20, #0x1 (1)",
"lsl x20, x21, x20",
"orr x4, x22, x20",
"eor x20, x4, x23, lsl #63",
"mov x20, x4",
"lsl x21, x20, x5",
"cset w22, hs",
"and x23, x5, #0x3f",
"cbz x23, #+0x4c",
"mov w24, #0x41",
"sub x24, x24, x23",
"lsr x24, x20, x24",
"mov w25, #0x0",
"mrs x30, nzcv",
"cmp x23, #0x1 (1)",
"csel x24, x24, x25, hi",
"orr x21, x21, x24",
"mov w24, #0x40",
"sub x24, x24, x23",
"lsr x20, x20, x24",
"msr nzcv, x30",
"rmif x20, #63, #nzCv",
"sub x23, x23, #0x1 (1)",
"lsl x22, x22, x23",
"orr x4, x21, x22",
"eor x20, x4, x20, lsl #63",
"rmif x20, #63, #nzcV"
]
},
@ -2141,58 +2143,60 @@
]
},
"rcr eax, cl": {
"ExpectedInstructionCount": 23,
"ExpectedInstructionCount": 24,
"Comment": "GROUP2 0xd3 /3",
"ExpectedArm64ASM": [
"cset w20, hs",
"mov w21, w5",
"cbz x21, #+0x54",
"mov w22, w4",
"lsr w23, w22, w21",
"mov w20, w5",
"mov w21, w4",
"lsr w22, w21, w20",
"cset w23, hs",
"and w20, w20, #0x1f",
"cbz x20, #+0x4c",
"mov w24, #0x0",
"mov w25, #0x21",
"sub w25, w25, w21",
"lsl w25, w22, w25",
"sub w25, w25, w20",
"lsl w25, w21, w25",
"mrs x30, nzcv",
"cmp w21, #0x1 (1)",
"cmp w20, #0x1 (1)",
"csel w24, w25, w24, hi",
"orr w23, w23, w24",
"sub w24, w21, #0x1 (1)",
"lsr w22, w22, w24",
"orr w22, w22, w24",
"sub w24, w20, #0x1 (1)",
"lsr w21, w21, w24",
"msr nzcv, x30",
"rmif x22, #63, #nzCv",
"mov w22, #0x20",
"sub w21, w22, w21",
"lsl x20, x20, x21",
"orr w4, w23, w20",
"rmif x21, #63, #nzCv",
"mov w21, #0x20",
"sub w20, w21, w20",
"lsl x20, x23, x20",
"orr w4, w22, w20",
"eor w20, w4, w4, lsr #1",
"rmif x20, #30, #nzcV"
]
},
"rcr rax, cl": {
"ExpectedInstructionCount": 22,
"ExpectedInstructionCount": 23,
"Comment": "GROUP2 0xd3 /3",
"ExpectedArm64ASM": [
"cset w20, hs",
"mov x21, x5",
"cbz x21, #+0x50",
"lsr x22, x4, x21",
"mov w23, #0x0",
"mov w24, #0x41",
"sub x24, x24, x21",
"lsl x24, x4, x24",
"mrs x25, nzcv",
"cmp x21, #0x1 (1)",
"csel x23, x24, x23, hi",
"orr x22, x22, x23",
"sub x23, x21, #0x1 (1)",
"lsr x23, x4, x23",
"msr nzcv, x25",
"rmif x23, #63, #nzCv",
"mov w23, #0x40",
"sub x21, x23, x21",
"lsl x20, x20, x21",
"orr x4, x22, x20",
"mov x20, x4",
"lsr x21, x20, x5",
"cset w22, hs",
"and x23, x5, #0x3f",
"cbz x23, #+0x4c",
"mov w24, #0x0",
"mov w25, #0x41",
"sub x25, x25, x23",
"lsl x25, x20, x25",
"mrs x30, nzcv",
"cmp x23, #0x1 (1)",
"csel x24, x25, x24, hi",
"orr x21, x21, x24",
"sub x24, x23, #0x1 (1)",
"lsr x20, x20, x24",
"msr nzcv, x30",
"rmif x20, #63, #nzCv",
"mov w20, #0x40",
"sub x20, x20, x23",
"lsl x20, x22, x20",
"orr x4, x21, x20",
"eor x20, x4, x4, lsr #1",
"rmif x20, #62, #nzcV"
]

View File

@ -1388,26 +1388,26 @@
"ExpectedInstructionCount": 8,
"Comment": "GROUP2 0xC1 /2",
"ExpectedArm64ASM": [
"cset w20, hs",
"mov w21, w4",
"lsl w22, w21, #2",
"orr w22, w22, w21, lsr #31",
"ubfx x21, x21, #30, #1",
"lsl x21, x21, #29",
"orr w4, w22, w20, lsl #1",
"msr nzcv, x21"
"mov w20, w4",
"lsl w21, w20, #2",
"cset w22, hs",
"orr w21, w21, w20, lsr #31",
"ubfx x20, x20, #30, #1",
"lsl x20, x20, #29",
"orr w4, w21, w22, lsl #1",
"msr nzcv, x20"
]
},
"rcl rax, 2": {
"ExpectedInstructionCount": 7,
"Comment": "GROUP2 0xC1 /2",
"ExpectedArm64ASM": [
"cset w20, hs",
"lsl x21, x4, #2",
"orr x21, x21, x4, lsr #63",
"lsl x20, x4, #2",
"cset w21, hs",
"orr x20, x20, x4, lsr #63",
"ubfx x22, x4, #62, #1",
"lsl x22, x22, #29",
"orr x4, x21, x20, lsl #1",
"orr x4, x20, x21, lsl #1",
"msr nzcv, x22"
]
},
@ -1442,26 +1442,26 @@
"ExpectedInstructionCount": 8,
"Comment": "GROUP2 0xC1 /3",
"ExpectedArm64ASM": [
"cset w20, hs",
"mov w21, w4",
"lsr w22, w21, #2",
"orr w22, w22, w21, lsl #31",
"ubfx x21, x21, #1, #1",
"lsl x21, x21, #29",
"orr w4, w22, w20, lsl #30",
"msr nzcv, x21"
"mov w20, w4",
"lsr w21, w20, #2",
"cset w22, hs",
"orr w21, w21, w20, lsl #31",
"ubfx x20, x20, #1, #1",
"lsl x20, x20, #29",
"orr w4, w21, w22, lsl #30",
"msr nzcv, x20"
]
},
"rcr rax, 2": {
"ExpectedInstructionCount": 7,
"Comment": "GROUP2 0xC1 /3",
"ExpectedArm64ASM": [
"cset w20, hs",
"lsr x21, x4, #2",
"orr x21, x21, x4, lsl #63",
"lsr x20, x4, #2",
"cset w21, hs",
"orr x20, x20, x4, lsl #63",
"ubfx x22, x4, #1, #1",
"lsl x22, x22, #29",
"orr x4, x21, x20, lsl #62",
"orr x4, x20, x21, lsl #62",
"msr nzcv, x22"
]
},
@ -2449,33 +2449,34 @@
]
},
"rcl eax, cl": {
"ExpectedInstructionCount": 29,
"ExpectedInstructionCount": 30,
"Comment": "GROUP2 0xd3 /2",
"ExpectedArm64ASM": [
"mov w20, w5",
"cset w21, hs",
"cbz x20, #+0x6c",
"mov w22, w4",
"lsl w23, w22, w20",
"mov w21, w4",
"lsl w22, w21, w20",
"cset w23, hs",
"and w20, w20, #0x1f",
"cbz x20, #+0x64",
"mov w24, #0x21",
"sub w24, w24, w20",
"lsr w24, w22, w24",
"lsr w24, w21, w24",
"mov w25, #0x0",
"mrs x30, nzcv",
"cmp w20, #0x1 (1)",
"csel w24, w24, w25, hi",
"orr w23, w23, w24",
"orr w22, w22, w24",
"mov w24, #0x20",
"sub w24, w24, w20",
"lsr w22, w22, w24",
"ubfx x24, x22, #0, #1",
"lsr w21, w21, w24",
"ubfx x24, x21, #0, #1",
"mov w0, w30",
"bfi w0, w24, #29, #1",
"mov w24, w0",
"sub w20, w20, #0x1 (1)",
"lsl x20, x21, x20",
"orr w4, w23, w20",
"eor w20, w4, w22, lsl #31",
"lsl x20, x23, x20",
"orr w4, w22, w20",
"eor w20, w4, w21, lsl #31",
"ubfx x20, x20, #31, #1",
"mov w0, w24",
"bfi w0, w20, #28, #1",
@ -2484,32 +2485,33 @@
]
},
"rcl rax, cl": {
"ExpectedInstructionCount": 28,
"ExpectedInstructionCount": 29,
"Comment": "GROUP2 0xd3 /2",
"ExpectedArm64ASM": [
"mov x20, x5",
"cset w21, hs",
"cbz x20, #+0x68",
"lsl x22, x4, x20",
"mov w23, #0x41",
"sub x23, x23, x20",
"lsr x23, x4, x23",
"mov w24, #0x0",
"mrs x25, nzcv",
"cmp x20, #0x1 (1)",
"csel x23, x23, x24, hi",
"orr x22, x22, x23",
"mov w23, #0x40",
"sub x23, x23, x20",
"lsr x23, x4, x23",
"ubfx x24, x23, #0, #1",
"mov w0, w25",
"mov x20, x4",
"lsl x21, x20, x5",
"cset w22, hs",
"and x23, x5, #0x3f",
"cbz x23, #+0x64",
"mov w24, #0x41",
"sub x24, x24, x23",
"lsr x24, x20, x24",
"mov w25, #0x0",
"mrs x30, nzcv",
"cmp x23, #0x1 (1)",
"csel x24, x24, x25, hi",
"orr x21, x21, x24",
"mov w24, #0x40",
"sub x24, x24, x23",
"lsr x20, x20, x24",
"ubfx x24, x20, #0, #1",
"mov w0, w30",
"bfi w0, w24, #29, #1",
"mov w24, w0",
"sub x20, x20, #0x1 (1)",
"lsl x20, x21, x20",
"orr x4, x22, x20",
"eor x20, x4, x23, lsl #63",
"sub x23, x23, #0x1 (1)",
"lsl x22, x22, x23",
"orr x4, x21, x22",
"eor x20, x4, x20, lsl #63",
"lsr x20, x20, #63",
"mov w0, w24",
"bfi w0, w20, #28, #1",
@ -2547,71 +2549,71 @@
]
},
"rcr eax, cl": {
"ExpectedInstructionCount": 29,
"ExpectedInstructionCount": 30,
"Comment": "GROUP2 0xd3 /3",
"ExpectedArm64ASM": [
"cset w20, hs",
"mov w21, w5",
"cbz x21, #+0x6c",
"mov w22, w4",
"lsr w23, w22, w21",
"mov w20, w5",
"mov w21, w4",
"lsr w22, w21, w20",
"cset w23, hs",
"and w20, w20, #0x1f",
"cbz x20, #+0x64",
"mov w24, #0x0",
"mov w25, #0x21",
"sub w25, w25, w21",
"lsl w25, w22, w25",
"sub w25, w25, w20",
"lsl w25, w21, w25",
"mrs x30, nzcv",
"cmp w21, #0x1 (1)",
"cmp w20, #0x1 (1)",
"csel w24, w25, w24, hi",
"orr w23, w23, w24",
"sub w24, w21, #0x1 (1)",
"lsr w22, w22, w24",
"ubfx x22, x22, #0, #1",
"orr w22, w22, w24",
"sub w24, w20, #0x1 (1)",
"lsr w21, w21, w24",
"ubfx x21, x21, #0, #1",
"mov w0, w30",
"bfi w0, w22, #29, #1",
"mov w22, w0",
"bfi w0, w21, #29, #1",
"mov w21, w0",
"mov w24, #0x20",
"sub w21, w24, w21",
"lsl x20, x20, x21",
"orr w4, w23, w20",
"sub w20, w24, w20",
"lsl x20, x23, x20",
"orr w4, w22, w20",
"eor w20, w4, w4, lsr #1",
"ubfx x20, x20, #30, #1",
"mov w0, w22",
"mov w0, w21",
"bfi w0, w20, #28, #1",
"mov w20, w0",
"msr nzcv, x20"
]
},
"rcr rax, cl": {
"ExpectedInstructionCount": 28,
"ExpectedInstructionCount": 27,
"Comment": "GROUP2 0xd3 /3",
"ExpectedArm64ASM": [
"cset w20, hs",
"mov x21, x5",
"cbz x21, #+0x68",
"lsr x22, x4, x21",
"mov w23, #0x0",
"mov w24, #0x41",
"sub x24, x24, x21",
"lsl x24, x4, x24",
"mrs x25, nzcv",
"cmp x21, #0x1 (1)",
"csel x23, x24, x23, hi",
"orr x22, x22, x23",
"sub x23, x21, #0x1 (1)",
"lsr x23, x4, x23",
"ubfx x23, x23, #0, #1",
"mov w0, w25",
"bfi w0, w23, #29, #1",
"mov w23, w0",
"mov w24, #0x40",
"sub x21, x24, x21",
"lsl x20, x20, x21",
"orr x4, x22, x20",
"eor x20, x4, x4, lsr #1",
"ubfx x20, x20, #62, #1",
"mov w0, w23",
"bfi w0, w20, #28, #1",
"mov x20, x4",
"lsr x21, x20, x5",
"cset w22, hs",
"and x23, x5, #0x3f",
"cbz x23, #+0x5c",
"mov w24, #0x0",
"mov w25, #0x41",
"sub x25, x25, x23",
"lsl x25, x20, x25",
"mrs x30, nzcv",
"cmp x23, #0x1 (1)",
"csel x24, x25, x24, hi",
"orr x21, x21, x24",
"sub x24, x23, #0x1 (1)",
"lsr x20, x20, x24",
"ubfx x20, x20, #0, #1",
"mov w0, w30",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"mov w24, #0x40",
"sub x23, x24, x23",
"lsl x22, x22, x23",
"orr x4, x21, x22",
"eor x21, x4, x4, lsr #1",
"ubfx x21, x21, #62, #1",
"bfi w20, w21, #28, #1",
"msr nzcv, x20"
]
},