OpcodeDispatcher: Handle RORX corner cases better

There are a few cases where we were emitting code when we
didn't really need to, or could emit less.
This commit is contained in:
Lioncache 2023-09-15 15:28:08 -04:00
parent 6e08ac65b9
commit 4a37ea4819
3 changed files with 76 additions and 17 deletions

View File

@ -2678,12 +2678,32 @@ void OpDispatchBuilder::BZHI(OpcodeArgs) {
}
void OpDispatchBuilder::RORX(OpcodeArgs) {
LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src[1] needs to be literal here");
const auto Amount = Op->Src[1].Data.Literal.Value;
const auto SrcSize = GetSrcSize(Op);
const auto SrcSizeBits = SrcSize * 8;
const auto GPRSize = CTX->GetGPRSize();
const auto DoRotation = Amount != 0 && Amount < SrcSizeBits;
const auto IsSameGPR = Op->Src[0].IsGPR() && Op->Dest.IsGPR() &&
Op->Src[0].Data.GPR.GPR == Op->Dest.Data.GPR.GPR;
const auto SrcSizeIsGPRSize = SrcSize == GPRSize;
// If we don't need to rotate and our source is the same as the destination
// then we don't need to do anything at all. We still need to be careful,
// since 32-bit operations on 64-bit mode still need to zero-extend the
// destination register. So also compare source size and GPR size.
//
// Very unlikely, but hey, we can do nothing faster.
if (!DoRotation && IsSameGPR && SrcSizeIsGPRSize) [[unlikely]] {
return;
}
auto* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, -1);
LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here");
const uint64_t Amount = Op->Src[1].Data.Literal.Value;
auto Result = _Ror(OpSizeFromSrc(Op), Src, _Constant(Amount));
auto* Result = Src;
if (DoRotation) [[likely]] {
Result = _Ror(OpSizeFromSrc(Op), Src, _Constant(Amount));
}
StoreResult(GPRClass, Op, Result, -1);
}

View File

@ -6,7 +6,8 @@
"RCX": "0xF00000000000000F",
"RDX": "0x80000000",
"RSI": "0xFF",
"RDI": "0xF000000F"
"RDI": "0xF000000F",
"R8": "0"
},
"HostFeatures": ["BMI2"]
}
@ -36,4 +37,8 @@ rorx edi, esi, 4,
; Test that we mask the rotation amount above the operand size (should leave edi's value alone).
rorx edi, edi, 32
; Zero-extending behavior
mov r8, 0xFFFFFFFF00000000
rorx r8d, r8d, 0
hlt

View File

@ -5415,14 +5415,23 @@
]
},
"rorx eax, ebx, 0": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"Map 3 0b11 0xf0 32-bit"
],
"ExpectedArm64ASM": [
"lsr w20, w7, #0",
"ror w4, w20, #0"
"lsr w4, w7, #0"
]
},
"rorx eax, eax, 0": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"Map 3 0b11 0xf0 32-bit"
],
"ExpectedArm64ASM": [
"lsr w4, w4, #0"
]
},
"rorx eax, ebx, 31": {
@ -5437,14 +5446,23 @@
]
},
"rorx eax, ebx, 32": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"Map 3 0b11 0xf0 32-bit"
],
"ExpectedArm64ASM": [
"lsr w20, w7, #0",
"ror w4, w20, #0"
"lsr w4, w7, #0"
]
},
"rorx eax, eax, 32": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"Map 3 0b11 0xf0 32-bit"
],
"ExpectedArm64ASM": [
"lsr w4, w4, #0"
]
},
"rorx rax, rbx, 0": {
@ -5454,9 +5472,17 @@
"Map 3 0b11 0xf0 64-bit"
],
"ExpectedArm64ASM": [
"ror x4, x7, #0"
"mov x4, x7"
]
},
"rorx rax, rax, 0": {
"ExpectedInstructionCount": 0,
"Optimal": "Yes",
"Comment": [
"Map 3 0b11 0xf0 64-bit"
],
"ExpectedArm64ASM": []
},
"rorx rax, rbx, 63": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
@ -5469,13 +5495,21 @@
},
"rorx rax, rbx, 64": {
"ExpectedInstructionCount": 1,
"Optimal": "No",
"Optimal": "Yes",
"Comment": [
"Map 3 0b11 0xf0 64-bit"
],
"ExpectedArm64ASM": [
"ror x4, x7, #0"
"mov x4, x7"
]
},
"rorx rax, rax, 64": {
"ExpectedInstructionCount": 0,
"Optimal": "Yes",
"Comment": [
"Map 3 0b11 0xf0 64-bit"
],
"ExpectedArm64ASM": []
}
}
}