Merge pull request #3673 from alyssarosenzweig/ra/tied

Track tied sources in the IR
This commit is contained in:
Ryan Houdek 2024-05-30 10:55:15 -07:00 committed by GitHub
commit ee96d60983
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 569 additions and 680 deletions

View File

@ -55,6 +55,7 @@ class OpDefinition:
DynamicDispatch: bool
JITDispatch: bool
JITDispatchOverride: str
TiedSource: int
Arguments: list
EmitValidation: list
Desc: list
@ -77,6 +78,7 @@ class OpDefinition:
self.DynamicDispatch = False
self.JITDispatch = True
self.JITDispatchOverride = None
self.TiedSource = -1
self.Arguments = []
self.EmitValidation = []
self.Desc = []
@ -248,6 +250,9 @@ def parse_ops(ops):
if "JITDispatchOverride" in op_val:
OpDef.JITDispatchOverride = op_val["JITDispatchOverride"]
if "TiedSource" in op_val:
OpDef.TiedSource = op_val["TiedSource"]
# Do some fixups of the data here
if len(OpDef.EmitValidation) != 0:
for i in range(len(OpDef.EmitValidation)):
@ -372,13 +377,30 @@ def print_ir_sizes():
output_file.write("[[maybe_unused, nodiscard]] static size_t GetSize(IROps Op) { return IRSizes[Op]; }\n\n")
output_file.write("[[nodiscard, gnu::const, gnu::visibility(\"default\")]] std::string_view const& GetName(IROps Op);\n")
output_file.write("[[nodiscard, gnu::const, gnu::visibility(\"default\")]] uint8_t GetArgs(IROps Op);\n")
output_file.write("[[nodiscard, gnu::const, gnu::visibility(\"default\")]] uint8_t GetRAArgs(IROps Op);\n")
output_file.write("[[nodiscard, gnu::const, gnu::visibility(\"default\")]] FEXCore::IR::RegisterClassType GetRegClass(IROps Op);\n\n")
output_file.write("[[nodiscard, gnu::const, gnu::visibility(\"default\")]] bool HasSideEffects(IROps Op);\n")
output_file.write("[[nodiscard, gnu::const, gnu::visibility(\"default\")]] bool ImplicitFlagClobber(IROps Op);\n")
output_file.write("[[nodiscard, gnu::const, gnu::visibility(\"default\")]] bool GetHasDest(IROps Op);\n")
output_file.write(
'[[nodiscard, gnu::const, gnu::visibility("default")]] std::string_view const& GetName(IROps Op);\n'
)
output_file.write(
'[[nodiscard, gnu::const, gnu::visibility("default")]] uint8_t GetArgs(IROps Op);\n'
)
output_file.write(
'[[nodiscard, gnu::const, gnu::visibility("default")]] uint8_t GetRAArgs(IROps Op);\n'
)
output_file.write(
'[[nodiscard, gnu::const, gnu::visibility("default")]] FEXCore::IR::RegisterClassType GetRegClass(IROps Op);\n\n'
)
output_file.write(
'[[nodiscard, gnu::const, gnu::visibility("default")]] bool HasSideEffects(IROps Op);\n'
)
output_file.write(
'[[nodiscard, gnu::const, gnu::visibility("default")]] bool ImplicitFlagClobber(IROps Op);\n'
)
output_file.write(
'[[nodiscard, gnu::const, gnu::visibility("default")]] bool GetHasDest(IROps Op);\n'
)
output_file.write(
'[[nodiscard, gnu::const, gnu::visibility("default")]] int8_t TiedSource(IROps Op);\n'
)
output_file.write("#undef IROP_SIZES\n")
output_file.write("#endif\n\n")
@ -471,15 +493,25 @@ def print_ir_getraargs():
def print_ir_hassideeffects():
output_file.write("#ifdef IROP_HASSIDEEFFECTS_IMPL\n")
for array, prop in [("SideEffects", "HasSideEffects"),
("ImplicitFlagClobbers", "ImplicitFlagClobber")]:
output_file.write(f"constexpr std::array<uint8_t, OP_LAST + 1> {array} = {{\n")
for array, prop, T in [
("SideEffects", "HasSideEffects", "bool"),
("ImplicitFlagClobbers", "ImplicitFlagClobber", "bool"),
("TiedSources", "TiedSource", "int8_t"),
]:
output_file.write(
f"constexpr std::array<{'uint8_t' if T == 'bool' else T}, OP_LAST + 1> {array} = {{\n"
)
for op in IROps:
output_file.write("\t{},\n".format(("true" if getattr(op, prop) else "false")))
if T == "bool":
output_file.write(
"\t{},\n".format(("true" if getattr(op, prop) else "false"))
)
else:
output_file.write(f"\t{getattr(op, prop)},\n")
output_file.write("};\n\n")
output_file.write(f"bool {prop}(IROps Op) {{\n")
output_file.write(f"{T} {prop}(IROps Op) {{\n")
output_file.write(f" return {array}[Op];\n")
output_file.write("}\n")

View File

@ -1748,12 +1748,6 @@ void OpDispatchBuilder::RotateOp(OpcodeArgs) {
auto Res = _Ror(OpSize, Dest, Left ? _Neg(OpSize, Src) : Src);
StoreResult(GPRClass, Op, Res, -1);
// Ends up faster overall if we don't have FlagM, slower if we do...
// If Shift != 1, OF is undefined so we choose to zero here.
if (!CTX->HostFeatures.SupportsFlagM) {
ZeroCV();
}
// Extract the last bit shifted in to CF
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Res, Left ? 0 : Size - 1, true);

View File

@ -1322,16 +1322,6 @@ private:
NZCVDirty = true;
}
void ZeroCV() {
// Get old NZCV before we mess with PossiblySetNZCVBits
auto OldNZCV = GetNZCV();
// Mask out the NZ bits, clearing CV. Even if the code sets CV after, this can end up faster
// moves by allowing orlshl to be used instead of bfi.
PossiblySetNZCVBits = (1u << IndexNZCV(FEXCore::X86State::RFLAG_SF_RAW_LOC)) | (1u << IndexNZCV(FEXCore::X86State::RFLAG_ZF_RAW_LOC));
SetNZCV(_And(OpSize::i32Bit, OldNZCV, _Constant(PossiblySetNZCVBits)));
}
void SetNZ_ZeroCV(unsigned SrcSize, OrderedNode* Res) {
HandleNZ00Write();
_TestNZ(IR::SizeToOpSize(SrcSize), Res, Res);

View File

@ -549,6 +549,7 @@
"Desc": ["Does a memory load to a single element of a vector.",
"Leaves the rest of the vector's data intact.",
"Matches arm64 ld1 semantics"],
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
@ -570,6 +571,7 @@
"The address is decremented by the value size while.",
"The return value size is the size of the current operating mode"
],
"TiedSource": 1,
"HasSideEffects": true,
"DestSize": "Size"
},
@ -1196,6 +1198,7 @@
"Desc": ["Integer binary and"
],
"DestSize": "Size",
"TiedSource": 0,
"HasSideEffects": true
},
"GPR = Andn OpSize:#Size, GPR:$Src1, GPR:$Src2": {
@ -1336,6 +1339,7 @@
"The bitfield is copied in to Dest[(Width + lsb):lsb]"
],
"DestSize": "Size",
"TiedSource": 0,
"EmitValidation": [
"Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
]
@ -1347,6 +1351,7 @@
"The bitfield is copied in to Dest[Width:0]"
],
"DestSize": "Size",
"TiedSource": 0,
"EmitValidation": [
"Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
]
@ -1777,29 +1782,35 @@
"NumElements": "RegisterSize / ElementSize"
},
"FPR = VShlI u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, u8:$BitShift": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
"FPR = VUShrI u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, u8:$BitShift": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
"FPR = VUShraI u8:#RegisterSize, u8:#ElementSize, FPR:$DestVector, FPR:$Vector, u8:$BitShift": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
"FPR = VSShrI u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, u8:$BitShift": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
"FPR = VUShrNI u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, u8:$BitShift": {
"TiedSource": 0,
"Desc": "Unsigned shifts right each element and then narrows to the next lower element size",
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / (ElementSize >> 1)"
},
"FPR = VUShrNI2 u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper, u8:$BitShift": {
"TiedSource": 0,
"Desc": ["Unsigned shifts right each element and then narrows to the next lower element size",
"Inserts results in to the high elements of the first argument"
],
@ -1831,10 +1842,12 @@
"NumElements": "RegisterSize / (ElementSize << 1)"
},
"FPR = VSQXTN u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / (ElementSize >> 1)"
},
"FPR = VSQXTN2 u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / (ElementSize >> 1)"
},
@ -1862,6 +1875,7 @@
"Desc": ["Signed rounding shift right by immediate",
"Exactly matching Arm64 srshr semantics"
],
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
@ -1869,6 +1883,7 @@
"Desc": ["Signed satuating shift left by immediate",
"Exactly matching Arm64 sqshl semantics"
],
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
@ -2077,42 +2092,52 @@
"NumElements": "RegisterSize / (ElementSize << 1)"
},
"FPR = VUShl u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftVector, i1:$RangeCheck": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
"FPR = VUShr u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftVector, i1:$RangeCheck": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
"FPR = VSShr u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftVector, i1:$RangeCheck": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
"FPR = VUShlS u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
"FPR = VUShrS u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
"FPR = VSShrS u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
"FPR = VUShrSWide u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
"FPR = VSShrSWide u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
"FPR = VUShlSWide u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
"FPR = VInsElement u8:#RegisterSize, u8:#ElementSize, u8:$DestIdx, u8:$SrcIdx, FPR:$DestVector, FPR:$SrcVector": {
"TiedSource": 0,
"DestSize": "RegisterSize",
"NumElements": "RegisterSize / ElementSize"
},
@ -2199,6 +2224,7 @@
"Table is always treated as a 128bit register",
"Indices matches destination size. Either 64bit or 128bit"
],
"TiedSource": 0,
"DestSize": "RegisterSize"
},
"FPR = VBSL u8:#RegisterSize, FPR:$VectorMask, FPR:$VectorTrue, FPR:$VectorFalse": {

View File

@ -388,6 +388,18 @@ private:
}
}
// Try to handle tied registers. This can fail, the JIT will insert moves.
if (int TiedIdx = IR::TiedSource(IROp->Op); TiedIdx >= 0) {
PhysicalRegister Reg = SSAToReg[IROp->Args[TiedIdx].ID().Value];
RegisterClass* Class = GetClass(Reg);
uint32_t RegBits = GetRegBits(Reg);
if (Reg.Class != GPRFixedClass && Reg.Class != FPRFixedClass && (Class->Available & RegBits) == RegBits) {
SetReg(CodeNode, Reg);
return;
}
}
RegisterClassType OrigClassType = GetRegClassFromNode(IR, IROp);
bool Pair = OrigClassType == GPRPairClass;
RegisterClassType ClassType = Pair ? GPRClass : OrigClassType;

View File

@ -1517,7 +1517,7 @@
]
},
"lock dec dword [rax]": {
"ExpectedInstructionCount": 9,
"ExpectedInstructionCount": 7,
"Comment": "GROUP4 0xfe /1",
"ExpectedArm64ASM": [
"mov w20, #0xffffffff",
@ -1525,14 +1525,12 @@
"cset w20, hs",
"subs w26, w27, #0x1 (1)",
"mrs x21, nzcv",
"mov w0, w21",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"msr nzcv, x20"
"bfi w21, w20, #29, #1",
"msr nzcv, x21"
]
},
"lock dec qword [rax]": {
"ExpectedInstructionCount": 9,
"ExpectedInstructionCount": 7,
"Comment": "GROUP4 0xfe /1",
"ExpectedArm64ASM": [
"mov x20, #0xffffffffffffffff",
@ -1540,10 +1538,8 @@
"cset w20, hs",
"subs x26, x27, #0x1 (1)",
"mrs x21, nzcv",
"mov w0, w21",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"msr nzcv, x20"
"bfi w21, w20, #29, #1",
"msr nzcv, x21"
]
},
"lock inc byte [rax]": {
@ -1577,7 +1573,7 @@
]
},
"lock inc dword [rax]": {
"ExpectedInstructionCount": 9,
"ExpectedInstructionCount": 7,
"Comment": "GROUP4 0xfe /0",
"ExpectedArm64ASM": [
"mov w20, #0x1",
@ -1585,14 +1581,12 @@
"cset w20, hs",
"adds w26, w27, #0x1 (1)",
"mrs x21, nzcv",
"mov w0, w21",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"msr nzcv, x20"
"bfi w21, w20, #29, #1",
"msr nzcv, x21"
]
},
"lock inc qword [rax]": {
"ExpectedInstructionCount": 9,
"ExpectedInstructionCount": 7,
"Comment": "GROUP4 0xfe /0",
"ExpectedArm64ASM": [
"mov w20, #0x1",
@ -1600,10 +1594,8 @@
"cset w20, hs",
"adds x26, x27, #0x1 (1)",
"mrs x21, nzcv",
"mov w0, w21",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"msr nzcv, x20"
"bfi w21, w20, #29, #1",
"msr nzcv, x21"
]
}
}

View File

@ -909,7 +909,7 @@
]
},
"rcl al, 2": {
"ExpectedInstructionCount": 20,
"ExpectedInstructionCount": 18,
"Comment": "GROUP2 0xC0 /2",
"ExpectedArm64ASM": [
"uxtb w20, w4",
@ -925,30 +925,26 @@
"bfi x21, x22, #36, #1",
"bfi x21, x20, #19, #8",
"bfi x21, x22, #27, #1",
"mov x0, x21",
"bfxil x0, x20, #0, #8",
"mov x20, x0",
"ror x21, x20, #62",
"bfxil x4, x21, #0, #8",
"ror x20, x20, #61",
"bfxil x21, x20, #0, #8",
"ror x20, x21, #62",
"bfxil x4, x20, #0, #8",
"ror x20, x21, #61",
"rmif x20, #63, #nzCv"
]
},
"rcr al, 2": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 9,
"Comment": "GROUP2 0xC0 /3",
"ExpectedArm64ASM": [
"cset w20, hs",
"uxtb w21, w4",
"mov x0, x21",
"bfi x0, x20, #8, #1",
"mov x20, x0",
"bfi x20, x20, #9, #9",
"bfi x20, x20, #18, #18",
"bfi x20, x20, #36, #9",
"lsr w21, w20, #2",
"bfxil x4, x21, #0, #8",
"rmif x20, #0, #nzCv"
"bfi x21, x20, #8, #1",
"bfi x21, x21, #9, #9",
"bfi x21, x21, #18, #18",
"bfi x21, x21, #36, #9",
"lsr w20, w21, #2",
"bfxil x4, x20, #0, #8",
"rmif x21, #0, #nzCv"
]
},
"shl al, 2": {
@ -1038,7 +1034,7 @@
]
},
"rcl ax, 2": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 14,
"Comment": "GROUP2 0xC1 /2",
"ExpectedArm64ASM": [
"uxth w20, w4",
@ -1050,12 +1046,10 @@
"bfi x21, x22, #46, #1",
"bfi x21, x20, #13, #16",
"bfi x21, x22, #29, #1",
"mov x0, x21",
"bfxil x0, x20, #0, #16",
"mov x20, x0",
"ror x21, x20, #62",
"bfxil x4, x21, #0, #16",
"ror x20, x20, #61",
"bfxil x21, x20, #0, #16",
"ror x20, x21, #62",
"bfxil x4, x20, #0, #16",
"ror x20, x21, #61",
"rmif x20, #63, #nzCv"
]
},
@ -1082,19 +1076,17 @@
]
},
"rcr ax, 2": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 8,
"Comment": "GROUP2 0xC1 /3",
"ExpectedArm64ASM": [
"cset w20, hs",
"uxth w21, w4",
"mov x0, x21",
"bfi x0, x20, #16, #1",
"mov x20, x0",
"bfi x20, x20, #17, #17",
"bfi x20, x20, #34, #17",
"lsr w21, w20, #2",
"bfxil x4, x21, #0, #16",
"rmif x20, #0, #nzCv"
"bfi x21, x20, #16, #1",
"bfi x21, x21, #17, #17",
"bfi x21, x21, #34, #17",
"lsr w20, w21, #2",
"bfxil x4, x20, #0, #16",
"rmif x21, #0, #nzCv"
]
},
"rcr eax, 2": {
@ -1573,11 +1565,11 @@
]
},
"rcl al, cl": {
"ExpectedInstructionCount": 28,
"ExpectedInstructionCount": 26,
"Comment": "GROUP2 0xd2 /2",
"ExpectedArm64ASM": [
"and w20, w5, #0x1f",
"cbz x20, #+0x6c",
"cbz x20, #+0x64",
"and w20, w5, #0x1f",
"uxtb w21, w4",
"mov w22, #0x0",
@ -1592,40 +1584,36 @@
"bfi x22, x23, #36, #1",
"bfi x22, x21, #19, #8",
"bfi x22, x23, #27, #1",
"mov x0, x22",
"bfxil x0, x21, #0, #8",
"mov x21, x0",
"neg w22, w20",
"ror x22, x21, x22",
"bfxil x4, x22, #0, #8",
"bfxil x22, x21, #0, #8",
"neg w21, w20",
"ror x21, x22, x21",
"bfxil x4, x21, #0, #8",
"mov w23, #0x3f",
"sub x20, x23, x20",
"ror x20, x21, x20",
"ror x20, x22, x20",
"rmif x20, #63, #nzCv",
"eor x20, x20, x22, lsr #7",
"eor x20, x20, x21, lsr #7",
"rmif x20, #0, #nzcV"
]
},
"rcr al, cl": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 15,
"Comment": "GROUP2 0xd2 /3",
"ExpectedArm64ASM": [
"and w20, w5, #0x1f",
"cbz x20, #+0x40",
"cbz x20, #+0x38",
"cset w20, hs",
"uxtb w21, w4",
"mov x0, x21",
"bfi x0, x20, #8, #1",
"mov x20, x0",
"bfi x20, x20, #9, #9",
"bfi x20, x20, #18, #18",
"bfi x20, x20, #36, #9",
"lsr w21, w20, w5",
"bfxil x4, x21, #0, #8",
"bfi x21, x20, #8, #1",
"bfi x21, x21, #9, #9",
"bfi x21, x21, #18, #18",
"bfi x21, x21, #36, #9",
"lsr w20, w21, w5",
"bfxil x4, x20, #0, #8",
"sub w22, w5, #0x1 (1)",
"lsr w20, w20, w22",
"rmif x20, #63, #nzCv",
"eor w20, w21, w21, lsr #1",
"lsr w21, w21, w22",
"rmif x21, #63, #nzCv",
"eor w20, w20, w20, lsr #1",
"rmif x20, #6, #nzcV"
]
},
@ -1760,11 +1748,11 @@
]
},
"rcl ax, cl": {
"ExpectedInstructionCount": 24,
"ExpectedInstructionCount": 22,
"Comment": "GROUP2 0xd3 /2",
"ExpectedArm64ASM": [
"and w20, w5, #0x1f",
"cbz x20, #+0x5c",
"cbz x20, #+0x54",
"and w20, w5, #0x1f",
"uxth w21, w4",
"mov w22, #0x0",
@ -1775,17 +1763,15 @@
"bfi x22, x23, #46, #1",
"bfi x22, x21, #13, #16",
"bfi x22, x23, #29, #1",
"mov x0, x22",
"bfxil x0, x21, #0, #16",
"mov x21, x0",
"neg w22, w20",
"ror x22, x21, x22",
"bfxil x4, x22, #0, #16",
"bfxil x22, x21, #0, #16",
"neg w21, w20",
"ror x21, x22, x21",
"bfxil x4, x21, #0, #16",
"mov w23, #0x3f",
"sub x20, x23, x20",
"ror x20, x21, x20",
"ror x20, x22, x20",
"rmif x20, #63, #nzCv",
"eor x20, x20, x22, lsr #15",
"eor x20, x20, x21, lsr #15",
"rmif x20, #0, #nzcV"
]
},
@ -1830,24 +1816,22 @@
]
},
"rcr ax, cl": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 14,
"Comment": "GROUP2 0xd3 /3",
"ExpectedArm64ASM": [
"and w20, w5, #0x1f",
"cbz x20, #+0x3c",
"cbz x20, #+0x34",
"cset w20, hs",
"uxth w21, w4",
"mov x0, x21",
"bfi x0, x20, #16, #1",
"mov x20, x0",
"bfi x20, x20, #17, #17",
"bfi x20, x20, #34, #17",
"lsr w21, w20, w5",
"bfxil x4, x21, #0, #16",
"bfi x21, x20, #16, #1",
"bfi x21, x21, #17, #17",
"bfi x21, x21, #34, #17",
"lsr w20, w21, w5",
"bfxil x4, x20, #0, #16",
"sub w22, w5, #0x1 (1)",
"lsr w20, w20, w22",
"rmif x20, #63, #nzCv",
"eor w20, w21, w21, lsr #1",
"lsr w21, w21, w22",
"rmif x21, #63, #nzCv",
"eor w20, w20, w20, lsr #1",
"rmif x20, #14, #nzcV"
]
},
@ -2080,7 +2064,7 @@
]
},
"div bl": {
"ExpectedInstructionCount": 13,
"ExpectedInstructionCount": 11,
"Comment": "GROUP2 0xf6 /6",
"ExpectedArm64ASM": [
"uxtb w20, w7",
@ -2092,14 +2076,12 @@
"uxth w1, w20",
"udiv w2, w0, w1",
"msub w20, w2, w1, w0",
"mov x0, x22",
"bfi x0, x20, #8, #8",
"mov x20, x0",
"bfxil x4, x20, #0, #16"
"bfi x22, x20, #8, #8",
"bfxil x4, x22, #0, #16"
]
},
"idiv bl": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 9,
"Comment": "GROUP2 0xf6 /7",
"ExpectedArm64ASM": [
"uxtb w20, w7",
@ -2109,10 +2091,8 @@
"sdiv x22, x21, x20",
"sdiv x0, x21, x20",
"msub x20, x0, x20, x21",
"mov x0, x22",
"bfi x0, x20, #8, #8",
"mov x20, x0",
"bfxil x4, x20, #0, #16"
"bfi x22, x20, #8, #8",
"bfxil x4, x22, #0, #16"
]
},
"test bx, 1": {

View File

@ -656,7 +656,7 @@
]
},
"sha1msg2 xmm0, xmm1": {
"ExpectedInstructionCount": 15,
"ExpectedInstructionCount": 11,
"Comment": [
"0x66 0x0f 0x38 0xca"
],
@ -665,17 +665,13 @@
"ext v2.16b, v2.16b, v17.16b, #12",
"eor v2.16b, v16.16b, v2.16b",
"shl v3.4s, v2.4s, #1",
"mov v0.16b, v3.16b",
"usra v0.4s, v2.4s, #31",
"mov v2.16b, v0.16b",
"dup v3.4s, v2.s[3]",
"eor v3.16b, v16.16b, v3.16b",
"shl v4.4s, v3.4s, #1",
"mov v0.16b, v4.16b",
"usra v0.4s, v3.4s, #31",
"mov v3.16b, v0.16b",
"mov v16.16b, v2.16b",
"mov v16.s[0], v3.s[0]"
"usra v3.4s, v2.4s, #31",
"dup v2.4s, v3.s[3]",
"eor v2.16b, v16.16b, v2.16b",
"shl v4.4s, v2.4s, #1",
"usra v4.4s, v2.4s, #31",
"mov v16.16b, v3.16b",
"mov v16.s[0], v4.s[0]"
]
},
"sha256rnds2 xmm0, xmm1": {
@ -861,7 +857,7 @@
]
},
"adcx eax, ebx": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 14,
"Comment": [
"0x66 0x0f 0x38 0xf6"
],
@ -878,14 +874,12 @@
"cset x21, ls",
"cmp x20, #0x1 (1)",
"csel x20, x21, x23, eq",
"mov w0, w22",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"msr nzcv, x20"
"bfi w22, w20, #29, #1",
"msr nzcv, x22"
]
},
"adcx rax, rbx": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 12,
"Comment": [
"0x66 REX.W 0x0f 0x38 0xf6"
],
@ -900,14 +894,12 @@
"cset x23, ls",
"cmp x20, #0x1 (1)",
"csel x20, x23, x22, eq",
"mov w0, w21",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"msr nzcv, x20"
"bfi w21, w20, #29, #1",
"msr nzcv, x21"
]
},
"adox eax, ebx": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 14,
"Comment": [
"0xf3 0x0f 0x38 0xf6"
],
@ -924,14 +916,12 @@
"cset x21, ls",
"cmp x20, #0x1 (1)",
"csel x20, x21, x23, eq",
"mov w0, w22",
"bfi w0, w20, #28, #1",
"mov w20, w0",
"msr nzcv, x20"
"bfi w22, w20, #28, #1",
"msr nzcv, x22"
]
},
"adox rax, rbx": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 12,
"Comment": [
"0xf3 REX.W 0x0f 0x38 0xf6"
],
@ -946,10 +936,8 @@
"cset x23, ls",
"cmp x20, #0x1 (1)",
"csel x20, x23, x22, eq",
"mov w0, w21",
"bfi w0, w20, #28, #1",
"mov w20, w0",
"msr nzcv, x20"
"bfi w21, w20, #28, #1",
"msr nzcv, x21"
]
}
}

View File

@ -2652,7 +2652,7 @@
]
},
"sahf": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 14,
"Comment": "0x9e",
"ExpectedArm64ASM": [
"ubfx w20, w4, #8, #8",
@ -2661,16 +2661,14 @@
"orr x27, x20, #0x2",
"ubfx x20, x27, #0, #1",
"mrs x21, nzcv",
"mov w0, w21",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"ubfx w21, w27, #2, #1",
"eor w26, w21, #0x1",
"ubfx x21, x27, #6, #1",
"bfi w20, w21, #30, #1",
"ubfx x21, x27, #7, #1",
"bfi w20, w21, #31, #1",
"msr nzcv, x20"
"bfi w21, w20, #29, #1",
"ubfx w20, w27, #2, #1",
"eor w26, w20, #0x1",
"ubfx x20, x27, #6, #1",
"bfi w21, w20, #30, #1",
"ubfx x20, x27, #7, #1",
"bfi w21, w20, #31, #1",
"msr nzcv, x21"
]
},
"lahf": {

File diff suppressed because it is too large Load Diff

View File

@ -199,18 +199,16 @@
]
},
"inc eax": {
"ExpectedInstructionCount": 9,
"ExpectedInstructionCount": 7,
"Comment": "0x40",
"ExpectedArm64ASM": [
"cset w20, hs",
"mov x27, x4",
"adds w26, w4, #0x1 (1)",
"mrs x21, nzcv",
"mov w0, w21",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"bfi w21, w20, #29, #1",
"mov x4, x26",
"msr nzcv, x20"
"msr nzcv, x21"
]
},
"dec ax": {
@ -244,18 +242,16 @@
]
},
"dec eax": {
"ExpectedInstructionCount": 9,
"ExpectedInstructionCount": 7,
"Comment": "0x48",
"ExpectedArm64ASM": [
"cset w20, hs",
"mov x27, x4",
"subs w26, w4, #0x1 (1)",
"mrs x21, nzcv",
"mov w0, w21",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"bfi w21, w20, #29, #1",
"mov x4, x26",
"msr nzcv, x20"
"msr nzcv, x21"
]
},
"pusha": {

View File

@ -5305,7 +5305,7 @@
]
},
"vpsadbw ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 37,
"ExpectedInstructionCount": 36,
"Comment": [
"Map 1 0b01 0xf6 256-bit"
],
@ -5330,18 +5330,17 @@
"mov z2.d, p0/m, z1.d",
"msr nzcv, x0",
"mov z1.q, q2",
"mov z2.d, z4.d",
"not p0.b, p7/z, p6.b",
"mov z2.b, p0/m, z1.b",
"mov z1.d, z2.d[1]",
"mov z3.d, z2.d",
"mov z4.b, p0/m, z1.b",
"mov z1.d, z4.d[1]",
"mov z2.d, z4.d",
"mrs x0, nzcv",
"index z0.d, #-2, #1",
"cmpeq p0.d, p7/z, z0.d, #0",
"mov z3.d, p0/m, z1.d",
"mov z2.d, p0/m, z1.d",
"msr nzcv, x0",
"mov z1.d, z2.d[2]",
"mov z16.d, z3.d",
"mov z1.d, z4.d[2]",
"mov z16.d, z2.d",
"mrs x0, nzcv",
"index z0.d, #-2, #1",
"cmpeq p0.d, p7/z, z0.d, #-1",