mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-12-15 18:08:35 +00:00
Merge pull request #3530 from alyssarosenzweig/opt/cmpxchg-flags2
Optimize cmpxchg with flagm
This commit is contained in:
commit
2a625a467b
@ -5,6 +5,7 @@ tags: backend|arm64
|
||||
$end_info$
|
||||
*/
|
||||
|
||||
#include "FEXCore/IR/IR.h"
|
||||
#include "Interface/Context/Context.h"
|
||||
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
|
||||
#include "Interface/Core/ArchHelpers/CodeEmitter/Registers.h"
|
||||
@ -299,6 +300,31 @@ DEF_OP(SubNZCV) {
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(CmpPairZ) {
|
||||
auto Op = IROp->C<IR::IROp_CmpPairZ>();
|
||||
const uint8_t OpSize = IROp->Size;
|
||||
|
||||
const auto EmitSize = OpSize == IR::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
|
||||
|
||||
// Save NZCV
|
||||
mrs(TMP1, ARMEmitter::SystemRegister::NZCV);
|
||||
|
||||
// Compare, setting Z and clobbering NzCV
|
||||
const auto Src1 = GetRegPair(Op->Src1.ID());
|
||||
const auto Src2 = GetRegPair(Op->Src2.ID());
|
||||
cmp(EmitSize, Src1.first, Src2.first);
|
||||
ccmp(EmitSize, Src1.second, Src2.second, ARMEmitter::StatusFlags::None, ARMEmitter::Condition::CC_EQ);
|
||||
|
||||
// Restore NzCV
|
||||
if (CTX->HostFeatures.SupportsFlagM) {
|
||||
rmif(TMP1, 0, 0xb /* NzCV */);
|
||||
} else {
|
||||
cset(ARMEmitter::Size::i32Bit, TMP2, ARMEmitter::Condition::CC_EQ);
|
||||
bfi(ARMEmitter::Size::i32Bit, TMP1, TMP2, 30 /* lsb: Z */, 1);
|
||||
msr(ARMEmitter::SystemRegister::NZCV, TMP1);
|
||||
}
|
||||
}
|
||||
|
||||
DEF_OP(CarryInvert) {
|
||||
LOGMAN_THROW_A_FMT(CTX->HostFeatures.SupportsFlagM, "Unsupported flagm op");
|
||||
cfinv();
|
||||
|
@ -31,11 +31,19 @@ DEF_OP(CASPair) {
|
||||
mov(EmitSize, Dst.second, TMP4.R());
|
||||
}
|
||||
else {
|
||||
// Save NZCV so we don't have to mark this op as clobbering NZCV (the
|
||||
// SupportsAtomics does not clobber atomics and this !SupportsAtomics path
|
||||
// is so slow it's not worth the complexity of splitting the IR op.). We
|
||||
// clobber NZCV inside the hot loop and we can't replace cmp/ccmp/b.ne with
|
||||
// something NZCV-preserving without requiring an extra instruction.
|
||||
mrs(TMP1, ARMEmitter::SystemRegister::NZCV);
|
||||
|
||||
ARMEmitter::BackwardLabel LoopTop;
|
||||
ARMEmitter::SingleUseForwardLabel LoopNotExpected;
|
||||
ARMEmitter::SingleUseForwardLabel LoopExpected;
|
||||
Bind(&LoopTop);
|
||||
|
||||
// This instruction sequence must be synced with HandleCASPAL_Armv8.
|
||||
ldaxp(EmitSize, TMP2, TMP3, MemSrc);
|
||||
cmp(EmitSize, TMP2, Expected.first);
|
||||
ccmp(EmitSize, TMP3, Expected.second, ARMEmitter::StatusFlags::None, ARMEmitter::Condition::CC_EQ);
|
||||
@ -54,6 +62,9 @@ DEF_OP(CASPair) {
|
||||
// Might have hit the case where ldaxr was hit but stlxr wasn't
|
||||
clrex();
|
||||
Bind(&LoopExpected);
|
||||
|
||||
// Restore
|
||||
msr(ARMEmitter::SystemRegister::NZCV, TMP1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4258,16 +4258,8 @@ void OpDispatchBuilder::CMPXCHGPairOp(OpcodeArgs) {
|
||||
OrderedNode *Result_Lower = _ExtractElementPair(IR::SizeToOpSize(Size), CASResult, 0);
|
||||
OrderedNode *Result_Upper = _ExtractElementPair(IR::SizeToOpSize(Size), CASResult, 1);
|
||||
|
||||
// Set ZF if memory result was expected
|
||||
auto OneConst = _Constant(1);
|
||||
auto ZeroConst = _Constant(0);
|
||||
|
||||
OrderedNode *ZFResult = _Select(FEXCore::IR::COND_EQ,
|
||||
CASResult, Expected,
|
||||
OneConst, ZeroConst);
|
||||
|
||||
// Set ZF
|
||||
SetRFLAG<FEXCore::X86State::RFLAG_ZF_RAW_LOC>(ZFResult);
|
||||
HandleNZCV_RMW();
|
||||
_CmpPairZ(IR::SizeToOpSize(Size), CASResult, Expected);
|
||||
CalculateDeferredFlags();
|
||||
|
||||
auto UpdateIfNotZF = [this](auto Reg, auto Value) {
|
||||
|
@ -642,7 +642,6 @@
|
||||
],
|
||||
"HasDest": true,
|
||||
"DestSize": "Size",
|
||||
"ImplicitFlagClobber": true,
|
||||
"NumElements": "2",
|
||||
"EmitValidation": [
|
||||
"Size == FEXCore::IR::OpSize::i64Bit || Size == FEXCore::IR::OpSize::i128Bit"
|
||||
@ -1095,6 +1094,11 @@
|
||||
"Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
|
||||
]
|
||||
},
|
||||
"CmpPairZ OpSize:#Size, GPRPair:$Src1, GPRPair:$Src2": {
|
||||
"Desc": ["Compares register pairs and sets Z accordingly, preserving N/Z/V.",
|
||||
"This accelerates cmpxchg."],
|
||||
"HasSideEffects": true
|
||||
},
|
||||
"SubNZCV OpSize:#Size, GPR:$Src1, GPR:$Src2": {
|
||||
"Desc": ["Set NZCV for the difference of two GPRs. ",
|
||||
"Carry flag uses arm64 definition, inverted x86.",
|
||||
|
@ -176,6 +176,12 @@ DeadFlagCalculationEliminination::Classify(IROp_Header *IROp)
|
||||
.CanEliminate = true,
|
||||
};
|
||||
|
||||
case OP_CMPPAIRZ:
|
||||
return {
|
||||
.Write = FLAG_Z,
|
||||
.CanEliminate = true,
|
||||
};
|
||||
|
||||
case OP_CARRYINVERT:
|
||||
return {
|
||||
.Read = FLAG_C,
|
||||
|
@ -284,6 +284,38 @@
|
||||
"mov x26, x5",
|
||||
"cmn wzr, w26, lsl #24"
|
||||
]
|
||||
},
|
||||
"Dead cmpxchg flags": {
|
||||
"ExpectedInstructionCount": 23,
|
||||
"x86Insts": [
|
||||
"cmpxchg8b [rbp]",
|
||||
"test rax, rax"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"add x20, x9, #0x0 (0)",
|
||||
"mov w21, w4",
|
||||
"mov w22, w6",
|
||||
"mov w23, w22",
|
||||
"mov w22, w21",
|
||||
"mov w21, w7",
|
||||
"mov w24, w5",
|
||||
"mov w25, w24",
|
||||
"mov w24, w21",
|
||||
"mov w2, w22",
|
||||
"mov w3, w23",
|
||||
"caspal w2, w3, w24, w25, [x20]",
|
||||
"mov w20, w2",
|
||||
"mov w21, w3",
|
||||
"mov w24, w20",
|
||||
"mov w25, w21",
|
||||
"mrs x0, nzcv",
|
||||
"cmp w20, w22",
|
||||
"ccmp w21, w23, #nzcv, eq",
|
||||
"rmif x0, #0, #NzCV",
|
||||
"csel x4, x24, x4, ne",
|
||||
"csel x6, x25, x6, ne",
|
||||
"ands x26, x4, x4"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -80,7 +80,7 @@
|
||||
]
|
||||
},
|
||||
"dxvk hotblock from MGRR": {
|
||||
"ExpectedInstructionCount": 42,
|
||||
"ExpectedInstructionCount": 40,
|
||||
"Comment": [
|
||||
"Hottest block in Metal Gear Rising: Revengeance render thread"
|
||||
],
|
||||
@ -128,21 +128,19 @@
|
||||
"mov w23, w6",
|
||||
"mov w24, w21",
|
||||
"mov w25, w5",
|
||||
"mrs x21, nzcv",
|
||||
"mov w2, w22",
|
||||
"mov w3, w23",
|
||||
"caspal w2, w3, w24, w25, [x20]",
|
||||
"mov w24, w2",
|
||||
"mov w25, w3",
|
||||
"mov w20, w24",
|
||||
"mov w12, w25",
|
||||
"cmp x24, x22",
|
||||
"ccmp x25, x23, #nzcv, eq",
|
||||
"cset x22, eq",
|
||||
"msr nzcv, x21",
|
||||
"rmif x22, #62, #nZcv",
|
||||
"csel x4, x20, x4, ne",
|
||||
"csel x6, x12, x6, ne"
|
||||
"mov w20, w2",
|
||||
"mov w21, w3",
|
||||
"mov w24, w20",
|
||||
"mov w25, w21",
|
||||
"mrs x0, nzcv",
|
||||
"cmp w20, w22",
|
||||
"ccmp w21, w23, #nzcv, eq",
|
||||
"rmif x0, #0, #NzCV",
|
||||
"csel x4, x24, x4, ne",
|
||||
"csel x6, x25, x6, ne"
|
||||
]
|
||||
},
|
||||
"Psychonauts matrix swizzle": {
|
||||
|
@ -644,7 +644,7 @@
|
||||
]
|
||||
},
|
||||
"cmpxchg8b [rbp]": {
|
||||
"ExpectedInstructionCount": 24,
|
||||
"ExpectedInstructionCount": 22,
|
||||
"Comment": "GROUP9 0x0F 0xC7 /1",
|
||||
"ExpectedArm64ASM": [
|
||||
"add x20, x9, #0x0 (0)",
|
||||
@ -656,25 +656,23 @@
|
||||
"mov w24, w5",
|
||||
"mov w25, w24",
|
||||
"mov w24, w21",
|
||||
"mrs x21, nzcv",
|
||||
"mov w2, w22",
|
||||
"mov w3, w23",
|
||||
"caspal w2, w3, w24, w25, [x20]",
|
||||
"mov w24, w2",
|
||||
"mov w25, w3",
|
||||
"mov w20, w24",
|
||||
"mov w30, w25",
|
||||
"cmp x24, x22",
|
||||
"ccmp x25, x23, #nzcv, eq",
|
||||
"cset x22, eq",
|
||||
"msr nzcv, x21",
|
||||
"rmif x22, #62, #nZcv",
|
||||
"csel x4, x20, x4, ne",
|
||||
"csel x6, x30, x6, ne"
|
||||
"mov w20, w2",
|
||||
"mov w21, w3",
|
||||
"mov w24, w20",
|
||||
"mov w25, w21",
|
||||
"mrs x0, nzcv",
|
||||
"cmp w20, w22",
|
||||
"ccmp w21, w23, #nzcv, eq",
|
||||
"rmif x0, #0, #NzCV",
|
||||
"csel x4, x24, x4, ne",
|
||||
"csel x6, x25, x6, ne"
|
||||
]
|
||||
},
|
||||
"cmpxchg16b [rbp]": {
|
||||
"ExpectedInstructionCount": 20,
|
||||
"ExpectedInstructionCount": 18,
|
||||
"Comment": "GROUP9 0x0F 0xC7 /1",
|
||||
"ExpectedArm64ASM": [
|
||||
"add x20, x9, #0x0 (0)",
|
||||
@ -682,21 +680,19 @@
|
||||
"mov x23, x6",
|
||||
"mov x24, x7",
|
||||
"mov x25, x5",
|
||||
"mrs x21, nzcv",
|
||||
"mov x2, x22",
|
||||
"mov x3, x23",
|
||||
"caspal x2, x3, x24, x25, [x20]",
|
||||
"mov x24, x2",
|
||||
"mov x25, x3",
|
||||
"mov x20, x24",
|
||||
"mov x30, x25",
|
||||
"cmp x24, x22",
|
||||
"ccmp x25, x23, #nzcv, eq",
|
||||
"cset x22, eq",
|
||||
"msr nzcv, x21",
|
||||
"rmif x22, #62, #nZcv",
|
||||
"csel x4, x20, x4, ne",
|
||||
"csel x6, x30, x6, ne"
|
||||
"mov x20, x2",
|
||||
"mov x21, x3",
|
||||
"mov x24, x20",
|
||||
"mov x25, x21",
|
||||
"mrs x0, nzcv",
|
||||
"cmp w20, w22",
|
||||
"ccmp w21, w23, #nzcv, eq",
|
||||
"rmif x0, #0, #NzCV",
|
||||
"csel x4, x24, x4, ne",
|
||||
"csel x6, x25, x6, ne"
|
||||
]
|
||||
},
|
||||
"rdrand ax": {
|
||||
|
@ -788,21 +788,21 @@
|
||||
"mov w24, w5",
|
||||
"mov w25, w24",
|
||||
"mov w24, w21",
|
||||
"mrs x21, nzcv",
|
||||
"mov w2, w22",
|
||||
"mov w3, w23",
|
||||
"caspal w2, w3, w24, w25, [x20]",
|
||||
"mov w24, w2",
|
||||
"mov w25, w3",
|
||||
"mov w20, w24",
|
||||
"mov w30, w25",
|
||||
"cmp x24, x22",
|
||||
"ccmp x25, x23, #nzcv, eq",
|
||||
"cset x22, eq",
|
||||
"bfi w21, w22, #30, #1",
|
||||
"msr nzcv, x21",
|
||||
"csel x4, x20, x4, ne",
|
||||
"csel x6, x30, x6, ne"
|
||||
"mov w20, w2",
|
||||
"mov w21, w3",
|
||||
"mov w24, w20",
|
||||
"mov w25, w21",
|
||||
"mrs x0, nzcv",
|
||||
"cmp w20, w22",
|
||||
"ccmp w21, w23, #nzcv, eq",
|
||||
"cset w1, eq",
|
||||
"bfi w0, w1, #30, #1",
|
||||
"msr nzcv, x0",
|
||||
"csel x4, x24, x4, ne",
|
||||
"csel x6, x25, x6, ne"
|
||||
]
|
||||
},
|
||||
"cmpxchg16b [rbp]": {
|
||||
@ -814,21 +814,21 @@
|
||||
"mov x23, x6",
|
||||
"mov x24, x7",
|
||||
"mov x25, x5",
|
||||
"mrs x21, nzcv",
|
||||
"mov x2, x22",
|
||||
"mov x3, x23",
|
||||
"caspal x2, x3, x24, x25, [x20]",
|
||||
"mov x24, x2",
|
||||
"mov x25, x3",
|
||||
"mov x20, x24",
|
||||
"mov x30, x25",
|
||||
"cmp x24, x22",
|
||||
"ccmp x25, x23, #nzcv, eq",
|
||||
"cset x22, eq",
|
||||
"bfi w21, w22, #30, #1",
|
||||
"msr nzcv, x21",
|
||||
"csel x4, x20, x4, ne",
|
||||
"csel x6, x30, x6, ne"
|
||||
"mov x20, x2",
|
||||
"mov x21, x3",
|
||||
"mov x24, x20",
|
||||
"mov x25, x21",
|
||||
"mrs x0, nzcv",
|
||||
"cmp w20, w22",
|
||||
"ccmp w21, w23, #nzcv, eq",
|
||||
"cset w1, eq",
|
||||
"bfi w0, w1, #30, #1",
|
||||
"msr nzcv, x0",
|
||||
"csel x4, x24, x4, ne",
|
||||
"csel x6, x25, x6, ne"
|
||||
]
|
||||
},
|
||||
"rdrand ax": {
|
||||
|
Loading…
Reference in New Issue
Block a user