mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-12-15 01:49:00 +00:00
Merge pull request #3528 from alyssarosenzweig/ra/xsave-xrstor
Eliminate crossblock liveness in xsave/xrstor
This commit is contained in:
commit
d25ace43aa
@ -809,6 +809,7 @@ public:
|
||||
void FXSaveOp(OpcodeArgs);
|
||||
void FXRStoreOp(OpcodeArgs);
|
||||
|
||||
OrderedNode *XSaveBase(X86Tables::DecodedOp Op);
|
||||
void XSaveOp(OpcodeArgs);
|
||||
|
||||
void PAlignrOp(OpcodeArgs);
|
||||
|
@ -3001,16 +3001,15 @@ void OpDispatchBuilder::XSaveOp(OpcodeArgs) {
|
||||
XSaveOpImpl(Op);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::XSaveOpImpl(OpcodeArgs) {
|
||||
const auto XSaveBase = [this, Op] {
|
||||
OrderedNode *Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false});
|
||||
return AppendSegmentOffset(Mem, Op->Flags);
|
||||
};
|
||||
OrderedNode *OpDispatchBuilder::XSaveBase(X86Tables::DecodedOp Op) {
|
||||
OrderedNode *Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false});
|
||||
return AppendSegmentOffset(Mem, Op->Flags);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::XSaveOpImpl(OpcodeArgs) {
|
||||
// NOTE: Mask should be EAX and EDX concatenated, but we only need to test
|
||||
// for features that are in the lower 32 bits, so EAX only is sufficient.
|
||||
OrderedNode *Mask = LoadGPRRegister(X86State::REG_RAX);
|
||||
OrderedNode *Base = XSaveBase();
|
||||
const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize());
|
||||
|
||||
const auto StoreIfFlagSet = [&](uint32_t BitIndex, auto fn, uint32_t FieldSize = 1){
|
||||
@ -3034,25 +3033,26 @@ void OpDispatchBuilder::XSaveOpImpl(OpcodeArgs) {
|
||||
|
||||
// x87
|
||||
{
|
||||
StoreIfFlagSet(0, [this, Op, Base] { SaveX87State(Op, Base); });
|
||||
StoreIfFlagSet(0, [this, Op] { SaveX87State(Op, XSaveBase(Op)); });
|
||||
}
|
||||
// SSE
|
||||
{
|
||||
StoreIfFlagSet(1, [this, Base] { SaveSSEState(Base); });
|
||||
StoreIfFlagSet(1, [this, Op] { SaveSSEState(XSaveBase(Op)); });
|
||||
}
|
||||
// AVX
|
||||
if (CTX->HostFeatures.SupportsAVX)
|
||||
{
|
||||
StoreIfFlagSet(2, [this, Base] { SaveAVXState(Base); });
|
||||
StoreIfFlagSet(2, [this, Op] { SaveAVXState(XSaveBase(Op)); });
|
||||
}
|
||||
|
||||
// We need to save MXCSR and MXCSR_MASK if either SSE or AVX are requested to be saved
|
||||
{
|
||||
StoreIfFlagSet(1, [this, Base] { SaveMXCSRState(Base); }, 2);
|
||||
StoreIfFlagSet(1, [this, Op] { SaveMXCSRState(XSaveBase(Op)); }, 2);
|
||||
}
|
||||
|
||||
// Update XSTATE_BV region of the XSAVE header
|
||||
{
|
||||
OrderedNode *Base = XSaveBase(Op);
|
||||
OrderedNode *HeaderOffset = _Add(OpSize, Base, _Constant(512));
|
||||
|
||||
// NOTE: We currently only support the first 3 bits (x87, SSE, and AVX)
|
||||
@ -3210,14 +3210,11 @@ void OpDispatchBuilder::FXRStoreOp(OpcodeArgs) {
|
||||
void OpDispatchBuilder::XRstorOpImpl(OpcodeArgs) {
|
||||
const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize());
|
||||
|
||||
const auto XSaveBase = [this, Op] {
|
||||
OrderedNode *Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false});
|
||||
return AppendSegmentOffset(Mem, Op->Flags);
|
||||
};
|
||||
|
||||
// Set up base address for the XSAVE region to restore from, and also read the
|
||||
// XSTATE_BV bit flags out of the XSTATE header.
|
||||
OrderedNode *Base = XSaveBase();
|
||||
//
|
||||
// Note: we rematerialize Base in each block to avoid crossblock liveness.
|
||||
OrderedNode *Base = XSaveBase(Op);
|
||||
OrderedNode *Mask = _LoadMem(GPRClass, 8, _Add(OpSize, Base, _Constant(512)), 8);
|
||||
|
||||
// If a bit in our XSTATE_BV is set, then we restore from that region of the XSAVE area,
|
||||
@ -3253,27 +3250,28 @@ void OpDispatchBuilder::XRstorOpImpl(OpcodeArgs) {
|
||||
// x87
|
||||
{
|
||||
RestoreIfFlagSetOrDefault(0,
|
||||
[this, Base] { RestoreX87State(Base); },
|
||||
[this, Op] { RestoreX87State(XSaveBase(Op)); },
|
||||
[this, Op] { DefaultX87State(Op); });
|
||||
}
|
||||
// SSE
|
||||
{
|
||||
RestoreIfFlagSetOrDefault(1,
|
||||
[this, Base] { RestoreSSEState(Base); },
|
||||
[this, Op] { RestoreSSEState(XSaveBase(Op)); },
|
||||
[this] { DefaultSSEState(); });
|
||||
}
|
||||
// AVX
|
||||
if (CTX->HostFeatures.SupportsAVX)
|
||||
{
|
||||
RestoreIfFlagSetOrDefault(2,
|
||||
[this, Base] { RestoreAVXState(Base); },
|
||||
[this, Op] { RestoreAVXState(XSaveBase(Op)); },
|
||||
[this] { DefaultAVXState(); });
|
||||
}
|
||||
|
||||
{
|
||||
// We need to restore the MXCSR if either SSE or AVX are requested to be saved
|
||||
RestoreIfFlagSetOrDefault(1,
|
||||
[this, Base, OpSize] {
|
||||
[this, Op, OpSize] {
|
||||
OrderedNode *Base = XSaveBase(Op);
|
||||
OrderedNode *MXCSRLocation = _Add(OpSize, Base, _Constant(24));
|
||||
OrderedNode *MXCSR = _LoadMem(GPRClass, 4, MXCSRLocation, 4);
|
||||
RestoreMXCSRState(MXCSR);
|
||||
|
@ -1407,80 +1407,79 @@
|
||||
]
|
||||
},
|
||||
"xsave [rax]": {
|
||||
"ExpectedInstructionCount": 71,
|
||||
"ExpectedInstructionCount": 70,
|
||||
"Comment": "GROUP15 0x0F 0xAE /4",
|
||||
"ExpectedArm64ASM": [
|
||||
"mov x20, x4",
|
||||
"mov x21, x4",
|
||||
"ubfx x22, x20, #0, #1",
|
||||
"cbnz x22, #+0x8",
|
||||
"ubfx x21, x20, #0, #1",
|
||||
"cbnz x21, #+0x8",
|
||||
"b #+0x84",
|
||||
"ldrh w22, [x28, #1024]",
|
||||
"strh w22, [x21]",
|
||||
"mov w22, #0x0",
|
||||
"ldrb w23, [x28, #747]",
|
||||
"bfi x22, x23, #11, #3",
|
||||
"ldrb w23, [x28, #744]",
|
||||
"ldrb w24, [x28, #745]",
|
||||
"ldrb w25, [x28, #746]",
|
||||
"ldrb w30, [x28, #750]",
|
||||
"orr x22, x22, x23, lsl #8",
|
||||
"orr x22, x22, x24, lsl #9",
|
||||
"orr x22, x22, x25, lsl #10",
|
||||
"orr x22, x22, x30, lsl #14",
|
||||
"strh w22, [x21, #2]",
|
||||
"ldrb w22, [x28, #1026]",
|
||||
"strb w22, [x21, #4]",
|
||||
"ldrh w21, [x28, #1024]",
|
||||
"strh w21, [x4]",
|
||||
"mov w21, #0x0",
|
||||
"ldrb w22, [x28, #747]",
|
||||
"bfi x21, x22, #11, #3",
|
||||
"ldrb w22, [x28, #744]",
|
||||
"ldrb w23, [x28, #745]",
|
||||
"ldrb w24, [x28, #746]",
|
||||
"ldrb w25, [x28, #750]",
|
||||
"orr x21, x21, x22, lsl #8",
|
||||
"orr x21, x21, x23, lsl #9",
|
||||
"orr x21, x21, x24, lsl #10",
|
||||
"orr x21, x21, x25, lsl #14",
|
||||
"strh w21, [x4, #2]",
|
||||
"ldrb w21, [x28, #1026]",
|
||||
"strb w21, [x4, #4]",
|
||||
"ldr q2, [x28, #768]",
|
||||
"str q2, [x21, #32]",
|
||||
"str q2, [x4, #32]",
|
||||
"ldr q2, [x28, #784]",
|
||||
"str q2, [x21, #48]",
|
||||
"str q2, [x4, #48]",
|
||||
"ldr q2, [x28, #800]",
|
||||
"str q2, [x21, #64]",
|
||||
"str q2, [x4, #64]",
|
||||
"ldr q2, [x28, #816]",
|
||||
"str q2, [x21, #80]",
|
||||
"str q2, [x4, #80]",
|
||||
"ldr q2, [x28, #832]",
|
||||
"str q2, [x21, #96]",
|
||||
"str q2, [x4, #96]",
|
||||
"ldr q2, [x28, #848]",
|
||||
"str q2, [x21, #112]",
|
||||
"str q2, [x4, #112]",
|
||||
"ldr q2, [x28, #864]",
|
||||
"str q2, [x21, #128]",
|
||||
"str q2, [x4, #128]",
|
||||
"ldr q2, [x28, #880]",
|
||||
"str q2, [x21, #144]",
|
||||
"ubfx x22, x20, #1, #1",
|
||||
"cbnz x22, #+0x8",
|
||||
"str q2, [x4, #144]",
|
||||
"ubfx x21, x20, #1, #1",
|
||||
"cbnz x21, #+0x8",
|
||||
"b #+0x44",
|
||||
"str q16, [x21, #160]",
|
||||
"str q17, [x21, #176]",
|
||||
"str q18, [x21, #192]",
|
||||
"str q19, [x21, #208]",
|
||||
"str q20, [x21, #224]",
|
||||
"str q21, [x21, #240]",
|
||||
"str q22, [x21, #256]",
|
||||
"str q23, [x21, #272]",
|
||||
"str q24, [x21, #288]",
|
||||
"str q25, [x21, #304]",
|
||||
"str q26, [x21, #320]",
|
||||
"str q27, [x21, #336]",
|
||||
"str q28, [x21, #352]",
|
||||
"str q29, [x21, #368]",
|
||||
"str q30, [x21, #384]",
|
||||
"str q31, [x21, #400]",
|
||||
"ubfx x22, x20, #1, #2",
|
||||
"cbnz x22, #+0x8",
|
||||
"str q16, [x4, #160]",
|
||||
"str q17, [x4, #176]",
|
||||
"str q18, [x4, #192]",
|
||||
"str q19, [x4, #208]",
|
||||
"str q20, [x4, #224]",
|
||||
"str q21, [x4, #240]",
|
||||
"str q22, [x4, #256]",
|
||||
"str q23, [x4, #272]",
|
||||
"str q24, [x4, #288]",
|
||||
"str q25, [x4, #304]",
|
||||
"str q26, [x4, #320]",
|
||||
"str q27, [x4, #336]",
|
||||
"str q28, [x4, #352]",
|
||||
"str q29, [x4, #368]",
|
||||
"str q30, [x4, #384]",
|
||||
"str q31, [x4, #400]",
|
||||
"ubfx x21, x20, #1, #2",
|
||||
"cbnz x21, #+0x8",
|
||||
"b #+0x2c",
|
||||
"mov w22, #0x1f80",
|
||||
"mrs x23, fpcr",
|
||||
"ubfx x23, x23, #22, #3",
|
||||
"rbit w0, w23",
|
||||
"bfi x23, x0, #30, #2",
|
||||
"bfi w22, w23, #13, #3",
|
||||
"add x23, x21, #0x18 (24)",
|
||||
"str w22, [x21, #24]",
|
||||
"mov w22, #0xffff",
|
||||
"str w22, [x23, #4]",
|
||||
"mov w21, #0x1f80",
|
||||
"mrs x22, fpcr",
|
||||
"ubfx x22, x22, #22, #3",
|
||||
"rbit w0, w22",
|
||||
"bfi x22, x0, #30, #2",
|
||||
"bfi w21, w22, #13, #3",
|
||||
"add x22, x4, #0x18 (24)",
|
||||
"str w21, [x4, #24]",
|
||||
"mov w21, #0xffff",
|
||||
"str w21, [x22, #4]",
|
||||
"ubfx x20, x20, #0, #3",
|
||||
"str x20, [x21, #512]"
|
||||
"str x20, [x4, #512]"
|
||||
]
|
||||
},
|
||||
"lfence": {
|
||||
@ -1491,55 +1490,54 @@
|
||||
]
|
||||
},
|
||||
"xrstor [rax]": {
|
||||
"ExpectedInstructionCount": 104,
|
||||
"ExpectedInstructionCount": 103,
|
||||
"Comment": "GROUP15 0x0F 0xAE /5",
|
||||
"ExpectedArm64ASM": [
|
||||
"mov x20, x4",
|
||||
"ldr x21, [x20, #512]",
|
||||
"ubfx x22, x21, #0, #1",
|
||||
"cbnz x22, #+0x8",
|
||||
"ldr x20, [x4, #512]",
|
||||
"ubfx x21, x20, #0, #1",
|
||||
"cbnz x21, #+0x8",
|
||||
"b #+0x84",
|
||||
"ldrh w22, [x20]",
|
||||
"strh w22, [x28, #1024]",
|
||||
"ldrh w22, [x20, #2]",
|
||||
"ubfx w23, w22, #11, #3",
|
||||
"strb w23, [x28, #747]",
|
||||
"ubfx w23, w22, #8, #1",
|
||||
"ubfx w24, w22, #9, #1",
|
||||
"ubfx w25, w22, #10, #1",
|
||||
"ubfx w22, w22, #14, #1",
|
||||
"strb w23, [x28, #744]",
|
||||
"strb w24, [x28, #745]",
|
||||
"strb w25, [x28, #746]",
|
||||
"strb w22, [x28, #750]",
|
||||
"ldrb w22, [x20, #4]",
|
||||
"strb w22, [x28, #1026]",
|
||||
"ldr q2, [x20, #32]",
|
||||
"ldrh w21, [x4]",
|
||||
"strh w21, [x28, #1024]",
|
||||
"ldrh w21, [x4, #2]",
|
||||
"ubfx w22, w21, #11, #3",
|
||||
"strb w22, [x28, #747]",
|
||||
"ubfx w22, w21, #8, #1",
|
||||
"ubfx w23, w21, #9, #1",
|
||||
"ubfx w24, w21, #10, #1",
|
||||
"ubfx w21, w21, #14, #1",
|
||||
"strb w22, [x28, #744]",
|
||||
"strb w23, [x28, #745]",
|
||||
"strb w24, [x28, #746]",
|
||||
"strb w21, [x28, #750]",
|
||||
"ldrb w21, [x4, #4]",
|
||||
"strb w21, [x28, #1026]",
|
||||
"ldr q2, [x4, #32]",
|
||||
"str q2, [x28, #768]",
|
||||
"ldr q2, [x20, #48]",
|
||||
"ldr q2, [x4, #48]",
|
||||
"str q2, [x28, #784]",
|
||||
"ldr q2, [x20, #64]",
|
||||
"ldr q2, [x4, #64]",
|
||||
"str q2, [x28, #800]",
|
||||
"ldr q2, [x20, #80]",
|
||||
"ldr q2, [x4, #80]",
|
||||
"str q2, [x28, #816]",
|
||||
"ldr q2, [x20, #96]",
|
||||
"ldr q2, [x4, #96]",
|
||||
"str q2, [x28, #832]",
|
||||
"ldr q2, [x20, #112]",
|
||||
"ldr q2, [x4, #112]",
|
||||
"str q2, [x28, #848]",
|
||||
"ldr q2, [x20, #128]",
|
||||
"ldr q2, [x4, #128]",
|
||||
"str q2, [x28, #864]",
|
||||
"ldr q2, [x20, #144]",
|
||||
"ldr q2, [x4, #144]",
|
||||
"str q2, [x28, #880]",
|
||||
"b #+0x4c",
|
||||
"mov w22, #0x0",
|
||||
"mov w23, #0x37f",
|
||||
"strh w23, [x28, #1024]",
|
||||
"strb w22, [x28, #747]",
|
||||
"strb w22, [x28, #744]",
|
||||
"strb w22, [x28, #745]",
|
||||
"strb w22, [x28, #746]",
|
||||
"strb w22, [x28, #750]",
|
||||
"strb w22, [x28, #1026]",
|
||||
"mov w21, #0x0",
|
||||
"mov w22, #0x37f",
|
||||
"strh w22, [x28, #1024]",
|
||||
"strb w21, [x28, #747]",
|
||||
"strb w21, [x28, #744]",
|
||||
"strb w21, [x28, #745]",
|
||||
"strb w21, [x28, #746]",
|
||||
"strb w21, [x28, #750]",
|
||||
"strb w21, [x28, #1026]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #768]",
|
||||
"str q2, [x28, #784]",
|
||||
@ -1549,25 +1547,25 @@
|
||||
"str q2, [x28, #848]",
|
||||
"str q2, [x28, #864]",
|
||||
"str q2, [x28, #880]",
|
||||
"ubfx x22, x21, #1, #1",
|
||||
"cbnz x22, #+0x8",
|
||||
"ubfx x21, x20, #1, #1",
|
||||
"cbnz x21, #+0x8",
|
||||
"b #+0x48",
|
||||
"ldr q16, [x20, #160]",
|
||||
"ldr q17, [x20, #176]",
|
||||
"ldr q18, [x20, #192]",
|
||||
"ldr q19, [x20, #208]",
|
||||
"ldr q20, [x20, #224]",
|
||||
"ldr q21, [x20, #240]",
|
||||
"ldr q22, [x20, #256]",
|
||||
"ldr q23, [x20, #272]",
|
||||
"ldr q24, [x20, #288]",
|
||||
"ldr q25, [x20, #304]",
|
||||
"ldr q26, [x20, #320]",
|
||||
"ldr q27, [x20, #336]",
|
||||
"ldr q28, [x20, #352]",
|
||||
"ldr q29, [x20, #368]",
|
||||
"ldr q30, [x20, #384]",
|
||||
"ldr q31, [x20, #400]",
|
||||
"ldr q16, [x4, #160]",
|
||||
"ldr q17, [x4, #176]",
|
||||
"ldr q18, [x4, #192]",
|
||||
"ldr q19, [x4, #208]",
|
||||
"ldr q20, [x4, #224]",
|
||||
"ldr q21, [x4, #240]",
|
||||
"ldr q22, [x4, #256]",
|
||||
"ldr q23, [x4, #272]",
|
||||
"ldr q24, [x4, #288]",
|
||||
"ldr q25, [x4, #304]",
|
||||
"ldr q26, [x4, #320]",
|
||||
"ldr q27, [x4, #336]",
|
||||
"ldr q28, [x4, #352]",
|
||||
"ldr q29, [x4, #368]",
|
||||
"ldr q30, [x4, #384]",
|
||||
"ldr q31, [x4, #400]",
|
||||
"b #+0x44",
|
||||
"movi v16.2d, #0x0",
|
||||
"mov v17.16b, v16.16b",
|
||||
@ -1585,10 +1583,10 @@
|
||||
"mov v29.16b, v16.16b",
|
||||
"mov v30.16b, v16.16b",
|
||||
"mov v31.16b, v16.16b",
|
||||
"ubfx x21, x21, #1, #2",
|
||||
"cbnz x21, #+0x8",
|
||||
"ubfx x20, x20, #1, #2",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x2c",
|
||||
"ldr w20, [x20, #24]",
|
||||
"ldr w20, [x4, #24]",
|
||||
"ubfx w20, w20, #13, #3",
|
||||
"rbit w1, w20",
|
||||
"lsr w1, w1, #30",
|
||||
|
@ -1587,80 +1587,79 @@
|
||||
]
|
||||
},
|
||||
"xsave [rax]": {
|
||||
"ExpectedInstructionCount": 71,
|
||||
"ExpectedInstructionCount": 70,
|
||||
"Comment": "GROUP15 0x0F 0xAE /4",
|
||||
"ExpectedArm64ASM": [
|
||||
"mov x20, x4",
|
||||
"mov x21, x4",
|
||||
"ubfx x22, x20, #0, #1",
|
||||
"cbnz x22, #+0x8",
|
||||
"ubfx x21, x20, #0, #1",
|
||||
"cbnz x21, #+0x8",
|
||||
"b #+0x84",
|
||||
"ldrh w22, [x28, #1024]",
|
||||
"strh w22, [x21]",
|
||||
"mov w22, #0x0",
|
||||
"ldrb w23, [x28, #747]",
|
||||
"bfi x22, x23, #11, #3",
|
||||
"ldrb w23, [x28, #744]",
|
||||
"ldrb w24, [x28, #745]",
|
||||
"ldrb w25, [x28, #746]",
|
||||
"ldrb w30, [x28, #750]",
|
||||
"orr x22, x22, x23, lsl #8",
|
||||
"orr x22, x22, x24, lsl #9",
|
||||
"orr x22, x22, x25, lsl #10",
|
||||
"orr x22, x22, x30, lsl #14",
|
||||
"strh w22, [x21, #2]",
|
||||
"ldrb w22, [x28, #1026]",
|
||||
"strb w22, [x21, #4]",
|
||||
"ldrh w21, [x28, #1024]",
|
||||
"strh w21, [x4]",
|
||||
"mov w21, #0x0",
|
||||
"ldrb w22, [x28, #747]",
|
||||
"bfi x21, x22, #11, #3",
|
||||
"ldrb w22, [x28, #744]",
|
||||
"ldrb w23, [x28, #745]",
|
||||
"ldrb w24, [x28, #746]",
|
||||
"ldrb w25, [x28, #750]",
|
||||
"orr x21, x21, x22, lsl #8",
|
||||
"orr x21, x21, x23, lsl #9",
|
||||
"orr x21, x21, x24, lsl #10",
|
||||
"orr x21, x21, x25, lsl #14",
|
||||
"strh w21, [x4, #2]",
|
||||
"ldrb w21, [x28, #1026]",
|
||||
"strb w21, [x4, #4]",
|
||||
"ldr q2, [x28, #768]",
|
||||
"str q2, [x21, #32]",
|
||||
"str q2, [x4, #32]",
|
||||
"ldr q2, [x28, #784]",
|
||||
"str q2, [x21, #48]",
|
||||
"str q2, [x4, #48]",
|
||||
"ldr q2, [x28, #800]",
|
||||
"str q2, [x21, #64]",
|
||||
"str q2, [x4, #64]",
|
||||
"ldr q2, [x28, #816]",
|
||||
"str q2, [x21, #80]",
|
||||
"str q2, [x4, #80]",
|
||||
"ldr q2, [x28, #832]",
|
||||
"str q2, [x21, #96]",
|
||||
"str q2, [x4, #96]",
|
||||
"ldr q2, [x28, #848]",
|
||||
"str q2, [x21, #112]",
|
||||
"str q2, [x4, #112]",
|
||||
"ldr q2, [x28, #864]",
|
||||
"str q2, [x21, #128]",
|
||||
"str q2, [x4, #128]",
|
||||
"ldr q2, [x28, #880]",
|
||||
"str q2, [x21, #144]",
|
||||
"ubfx x22, x20, #1, #1",
|
||||
"cbnz x22, #+0x8",
|
||||
"str q2, [x4, #144]",
|
||||
"ubfx x21, x20, #1, #1",
|
||||
"cbnz x21, #+0x8",
|
||||
"b #+0x44",
|
||||
"str q16, [x21, #160]",
|
||||
"str q17, [x21, #176]",
|
||||
"str q18, [x21, #192]",
|
||||
"str q19, [x21, #208]",
|
||||
"str q20, [x21, #224]",
|
||||
"str q21, [x21, #240]",
|
||||
"str q22, [x21, #256]",
|
||||
"str q23, [x21, #272]",
|
||||
"str q24, [x21, #288]",
|
||||
"str q25, [x21, #304]",
|
||||
"str q26, [x21, #320]",
|
||||
"str q27, [x21, #336]",
|
||||
"str q28, [x21, #352]",
|
||||
"str q29, [x21, #368]",
|
||||
"str q30, [x21, #384]",
|
||||
"str q31, [x21, #400]",
|
||||
"ubfx x22, x20, #1, #2",
|
||||
"cbnz x22, #+0x8",
|
||||
"str q16, [x4, #160]",
|
||||
"str q17, [x4, #176]",
|
||||
"str q18, [x4, #192]",
|
||||
"str q19, [x4, #208]",
|
||||
"str q20, [x4, #224]",
|
||||
"str q21, [x4, #240]",
|
||||
"str q22, [x4, #256]",
|
||||
"str q23, [x4, #272]",
|
||||
"str q24, [x4, #288]",
|
||||
"str q25, [x4, #304]",
|
||||
"str q26, [x4, #320]",
|
||||
"str q27, [x4, #336]",
|
||||
"str q28, [x4, #352]",
|
||||
"str q29, [x4, #368]",
|
||||
"str q30, [x4, #384]",
|
||||
"str q31, [x4, #400]",
|
||||
"ubfx x21, x20, #1, #2",
|
||||
"cbnz x21, #+0x8",
|
||||
"b #+0x2c",
|
||||
"mov w22, #0x1f80",
|
||||
"mrs x23, fpcr",
|
||||
"ubfx x23, x23, #22, #3",
|
||||
"rbit w0, w23",
|
||||
"bfi x23, x0, #30, #2",
|
||||
"bfi w22, w23, #13, #3",
|
||||
"add x23, x21, #0x18 (24)",
|
||||
"str w22, [x21, #24]",
|
||||
"mov w22, #0xffff",
|
||||
"str w22, [x23, #4]",
|
||||
"mov w21, #0x1f80",
|
||||
"mrs x22, fpcr",
|
||||
"ubfx x22, x22, #22, #3",
|
||||
"rbit w0, w22",
|
||||
"bfi x22, x0, #30, #2",
|
||||
"bfi w21, w22, #13, #3",
|
||||
"add x22, x4, #0x18 (24)",
|
||||
"str w21, [x4, #24]",
|
||||
"mov w21, #0xffff",
|
||||
"str w21, [x22, #4]",
|
||||
"ubfx x20, x20, #0, #3",
|
||||
"str x20, [x21, #512]"
|
||||
"str x20, [x4, #512]"
|
||||
]
|
||||
},
|
||||
"lfence": {
|
||||
@ -1671,55 +1670,54 @@
|
||||
]
|
||||
},
|
||||
"xrstor [rax]": {
|
||||
"ExpectedInstructionCount": 104,
|
||||
"ExpectedInstructionCount": 103,
|
||||
"Comment": "GROUP15 0x0F 0xAE /5",
|
||||
"ExpectedArm64ASM": [
|
||||
"mov x20, x4",
|
||||
"ldr x21, [x20, #512]",
|
||||
"ubfx x22, x21, #0, #1",
|
||||
"cbnz x22, #+0x8",
|
||||
"ldr x20, [x4, #512]",
|
||||
"ubfx x21, x20, #0, #1",
|
||||
"cbnz x21, #+0x8",
|
||||
"b #+0x84",
|
||||
"ldrh w22, [x20]",
|
||||
"strh w22, [x28, #1024]",
|
||||
"ldrh w22, [x20, #2]",
|
||||
"ubfx w23, w22, #11, #3",
|
||||
"strb w23, [x28, #747]",
|
||||
"ubfx w23, w22, #8, #1",
|
||||
"ubfx w24, w22, #9, #1",
|
||||
"ubfx w25, w22, #10, #1",
|
||||
"ubfx w22, w22, #14, #1",
|
||||
"strb w23, [x28, #744]",
|
||||
"strb w24, [x28, #745]",
|
||||
"strb w25, [x28, #746]",
|
||||
"strb w22, [x28, #750]",
|
||||
"ldrb w22, [x20, #4]",
|
||||
"strb w22, [x28, #1026]",
|
||||
"ldr q2, [x20, #32]",
|
||||
"ldrh w21, [x4]",
|
||||
"strh w21, [x28, #1024]",
|
||||
"ldrh w21, [x4, #2]",
|
||||
"ubfx w22, w21, #11, #3",
|
||||
"strb w22, [x28, #747]",
|
||||
"ubfx w22, w21, #8, #1",
|
||||
"ubfx w23, w21, #9, #1",
|
||||
"ubfx w24, w21, #10, #1",
|
||||
"ubfx w21, w21, #14, #1",
|
||||
"strb w22, [x28, #744]",
|
||||
"strb w23, [x28, #745]",
|
||||
"strb w24, [x28, #746]",
|
||||
"strb w21, [x28, #750]",
|
||||
"ldrb w21, [x4, #4]",
|
||||
"strb w21, [x28, #1026]",
|
||||
"ldr q2, [x4, #32]",
|
||||
"str q2, [x28, #768]",
|
||||
"ldr q2, [x20, #48]",
|
||||
"ldr q2, [x4, #48]",
|
||||
"str q2, [x28, #784]",
|
||||
"ldr q2, [x20, #64]",
|
||||
"ldr q2, [x4, #64]",
|
||||
"str q2, [x28, #800]",
|
||||
"ldr q2, [x20, #80]",
|
||||
"ldr q2, [x4, #80]",
|
||||
"str q2, [x28, #816]",
|
||||
"ldr q2, [x20, #96]",
|
||||
"ldr q2, [x4, #96]",
|
||||
"str q2, [x28, #832]",
|
||||
"ldr q2, [x20, #112]",
|
||||
"ldr q2, [x4, #112]",
|
||||
"str q2, [x28, #848]",
|
||||
"ldr q2, [x20, #128]",
|
||||
"ldr q2, [x4, #128]",
|
||||
"str q2, [x28, #864]",
|
||||
"ldr q2, [x20, #144]",
|
||||
"ldr q2, [x4, #144]",
|
||||
"str q2, [x28, #880]",
|
||||
"b #+0x4c",
|
||||
"mov w22, #0x0",
|
||||
"mov w23, #0x37f",
|
||||
"strh w23, [x28, #1024]",
|
||||
"strb w22, [x28, #747]",
|
||||
"strb w22, [x28, #744]",
|
||||
"strb w22, [x28, #745]",
|
||||
"strb w22, [x28, #746]",
|
||||
"strb w22, [x28, #750]",
|
||||
"strb w22, [x28, #1026]",
|
||||
"mov w21, #0x0",
|
||||
"mov w22, #0x37f",
|
||||
"strh w22, [x28, #1024]",
|
||||
"strb w21, [x28, #747]",
|
||||
"strb w21, [x28, #744]",
|
||||
"strb w21, [x28, #745]",
|
||||
"strb w21, [x28, #746]",
|
||||
"strb w21, [x28, #750]",
|
||||
"strb w21, [x28, #1026]",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #768]",
|
||||
"str q2, [x28, #784]",
|
||||
@ -1729,25 +1727,25 @@
|
||||
"str q2, [x28, #848]",
|
||||
"str q2, [x28, #864]",
|
||||
"str q2, [x28, #880]",
|
||||
"ubfx x22, x21, #1, #1",
|
||||
"cbnz x22, #+0x8",
|
||||
"ubfx x21, x20, #1, #1",
|
||||
"cbnz x21, #+0x8",
|
||||
"b #+0x48",
|
||||
"ldr q16, [x20, #160]",
|
||||
"ldr q17, [x20, #176]",
|
||||
"ldr q18, [x20, #192]",
|
||||
"ldr q19, [x20, #208]",
|
||||
"ldr q20, [x20, #224]",
|
||||
"ldr q21, [x20, #240]",
|
||||
"ldr q22, [x20, #256]",
|
||||
"ldr q23, [x20, #272]",
|
||||
"ldr q24, [x20, #288]",
|
||||
"ldr q25, [x20, #304]",
|
||||
"ldr q26, [x20, #320]",
|
||||
"ldr q27, [x20, #336]",
|
||||
"ldr q28, [x20, #352]",
|
||||
"ldr q29, [x20, #368]",
|
||||
"ldr q30, [x20, #384]",
|
||||
"ldr q31, [x20, #400]",
|
||||
"ldr q16, [x4, #160]",
|
||||
"ldr q17, [x4, #176]",
|
||||
"ldr q18, [x4, #192]",
|
||||
"ldr q19, [x4, #208]",
|
||||
"ldr q20, [x4, #224]",
|
||||
"ldr q21, [x4, #240]",
|
||||
"ldr q22, [x4, #256]",
|
||||
"ldr q23, [x4, #272]",
|
||||
"ldr q24, [x4, #288]",
|
||||
"ldr q25, [x4, #304]",
|
||||
"ldr q26, [x4, #320]",
|
||||
"ldr q27, [x4, #336]",
|
||||
"ldr q28, [x4, #352]",
|
||||
"ldr q29, [x4, #368]",
|
||||
"ldr q30, [x4, #384]",
|
||||
"ldr q31, [x4, #400]",
|
||||
"b #+0x44",
|
||||
"movi v16.2d, #0x0",
|
||||
"mov v17.16b, v16.16b",
|
||||
@ -1765,10 +1763,10 @@
|
||||
"mov v29.16b, v16.16b",
|
||||
"mov v30.16b, v16.16b",
|
||||
"mov v31.16b, v16.16b",
|
||||
"ubfx x21, x21, #1, #2",
|
||||
"cbnz x21, #+0x8",
|
||||
"ubfx x20, x20, #1, #2",
|
||||
"cbnz x20, #+0x8",
|
||||
"b #+0x2c",
|
||||
"ldr w20, [x20, #24]",
|
||||
"ldr w20, [x4, #24]",
|
||||
"ubfx w20, w20, #13, #3",
|
||||
"rbit w1, w20",
|
||||
"lsr w1, w1, #30",
|
||||
|
Loading…
Reference in New Issue
Block a user