Merge pull request #3421 from pmatos/AddressingModes32

Improve 32bit ld/st addressing mode propagation
This commit is contained in:
Ryan Houdek 2024-03-11 15:20:20 -07:00 committed by GitHub
commit ff0c7637c9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 345 additions and 392 deletions

View File

@ -80,7 +80,8 @@ void PassManager::AddDefaultPasses(FEXCore::Context::ContextImpl *ctx, bool Inli
InsertPass(CreateDeadStoreElimination(ctx->HostFeatures.SupportsAVX));
InsertPass(CreatePassDeadCodeElimination());
InsertPass(CreateConstProp(InlineConstants, ctx->HostFeatures.SupportsTSOImm9));
InsertPass(CreateConstProp(
InlineConstants, ctx->HostFeatures.SupportsTSOImm9, Is64BitMode()));
InsertPass(CreateDeadFlagCalculationEliminination());
@ -121,5 +122,4 @@ bool PassManager::Run(IREmitter *IREmit) {
return Changed;
}
}

View File

@ -16,15 +16,17 @@ class Pass;
class RegisterAllocationPass;
class RegisterAllocationData;
fextl::unique_ptr<FEXCore::IR::Pass> CreateConstProp(bool InlineConstants, bool SupportsTSOImm9);
fextl::unique_ptr<FEXCore::IR::Pass>
CreateConstProp(bool InlineConstants, bool SupportsTSOImm9, bool Is64BitMode);
fextl::unique_ptr<FEXCore::IR::Pass> CreateContextLoadStoreElimination(bool SupportsAVX);
fextl::unique_ptr<FEXCore::IR::Pass> CreateInlineCallOptimization(const FEXCore::CPUIDEmu* CPUID);
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadFlagCalculationEliminination();
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadStoreElimination(bool SupportsAVX);
fextl::unique_ptr<FEXCore::IR::Pass> CreatePassDeadCodeElimination();
fextl::unique_ptr<FEXCore::IR::Pass> CreateIRCompaction(FEXCore::Utils::IntrusivePooledAllocator &Allocator);
fextl::unique_ptr<FEXCore::IR::RegisterAllocationPass> CreateRegisterAllocationPass(FEXCore::IR::Pass* CompactionPass,
bool SupportsAVX);
fextl::unique_ptr<FEXCore::IR::RegisterAllocationPass>
CreateRegisterAllocationPass(FEXCore::IR::Pass *CompactionPass,
bool SupportsAVX);
fextl::unique_ptr<FEXCore::IR::Pass> CreateLongDivideEliminationPass();
namespace Validation {

View File

@ -27,6 +27,7 @@ $end_info$
#include <bit>
#include <cstdint>
#include <memory>
#include <optional>
#include <string.h>
#include <tuple>
#include <utility>
@ -90,58 +91,111 @@ static bool IsTSOImm9(uint64_t imm) {
}
}
static std::tuple<MemOffsetType, uint8_t, OrderedNode*, OrderedNode*> MemExtendedAddressing(IREmitter *IREmit, uint8_t AccessSize, IROp_Header* AddressHeader) {
using MemExtendedAddrResult =
std::tuple<MemOffsetType, uint8_t, OrderedNode *, OrderedNode *>;
// If this optimization doesn't succeed, it will return the nullopt
static std::optional<MemExtendedAddrResult>
MemExtendedAddressing(IREmitter *IREmit, uint8_t AccessSize,
IROp_Header *AddressHeader) {
LOGMAN_THROW_A_FMT(AddressHeader->Op == OP_ADD, "Invalid address Op");
auto Src0Header = IREmit->GetOpHeader(AddressHeader->Args[0]);
if (Src0Header->Size == 8) {
//Try to optimize: Base + MUL(Offset, Scale)
// Try to optimize: Base + MUL(Offset, Scale)
if (Src0Header->Op == OP_MUL) {
uint64_t Scale;
if (IREmit->IsValueConstant(Src0Header->Args[1], &Scale)) {
if (IsMemoryScale(Scale, AccessSize)) {
// remove mul as it can be folded to the mem op
return { MEM_OFFSET_SXTX, (uint8_t)Scale, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0]) };
return std::make_optional(
std::make_tuple(MEM_OFFSET_SXTX, (uint8_t)Scale,
IREmit->UnwrapNode(AddressHeader->Args[1]),
IREmit->UnwrapNode(Src0Header->Args[0])));
} else if (Scale == 1) {
// remove nop mul
return { MEM_OFFSET_SXTX, 1, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0]) };
return std::make_optional(std::make_tuple(
MEM_OFFSET_SXTX, 1, IREmit->UnwrapNode(AddressHeader->Args[1]),
IREmit->UnwrapNode(Src0Header->Args[0])));
}
}
}
//Try to optimize: Base + LSHL(Offset, Scale)
// Try to optimize: Base + LSHL(Offset, Scale)
else if (Src0Header->Op == OP_LSHL) {
uint64_t Constant2;
if (IREmit->IsValueConstant(Src0Header->Args[1], &Constant2)) {
uint64_t Scale = 1<<Constant2;
if (IsMemoryScale(Scale, AccessSize)) {
// remove shift as it can be folded to the mem op
return { MEM_OFFSET_SXTX, Scale, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0]) };
return std::make_optional(
std::make_tuple(MEM_OFFSET_SXTX, Scale,
IREmit->UnwrapNode(AddressHeader->Args[1]),
IREmit->UnwrapNode(Src0Header->Args[0])));
} else if (Scale == 1) {
// remove nop shift
return { MEM_OFFSET_SXTX, 1, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0]) };
return std::make_optional(std::make_tuple(
MEM_OFFSET_SXTX, 1, IREmit->UnwrapNode(AddressHeader->Args[1]),
IREmit->UnwrapNode(Src0Header->Args[0])));
}
}
}
#if defined(_M_ARM_64) // x86 can't sext or zext on mem ops
//Try to optimize: Base + (u32)Offset
// Try to optimize: Base + (u32)Offset
else if (Src0Header->Op == OP_BFE) {
auto Bfe = Src0Header->C<IROp_Bfe>();
if (Bfe->lsb == 0 && Bfe->Width == 32) {
//todo: arm can also scale here
return { MEM_OFFSET_UXTW, 1, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0]) };
return std::make_optional(std::make_tuple(
MEM_OFFSET_UXTW, 1, IREmit->UnwrapNode(AddressHeader->Args[1]),
IREmit->UnwrapNode(Src0Header->Args[0])));
}
}
//Try to optimize: Base + (s32)Offset
// Try to optimize: Base + (s32)Offset
else if (Src0Header->Op == OP_SBFE) {
auto Sbfe = Src0Header->C<IROp_Sbfe>();
if (Sbfe->lsb == 0 && Sbfe->Width == 32) {
//todo: arm can also scale here
return { MEM_OFFSET_SXTW, 1, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0]) };
// todo: arm can also scale here
return std::make_optional(std::make_tuple(
MEM_OFFSET_SXTW, 1, IREmit->UnwrapNode(AddressHeader->Args[1]),
IREmit->UnwrapNode(Src0Header->Args[0])));
}
}
#endif
}
// no match anywhere, just add
return { MEM_OFFSET_SXTX, 1, IREmit->UnwrapNode(AddressHeader->Args[0]), IREmit->UnwrapNode(AddressHeader->Args[1]) };
// However, if we have one 32bit negative constant, we need to sign extend it
auto Arg0_ = AddressHeader->Args[0];
auto Arg1_ = AddressHeader->Args[1];
auto Arg1H = IREmit->GetOpHeader(Arg1_);
auto Arg0 = IREmit->UnwrapNode(Arg0_);
auto Arg1 = IREmit->UnwrapNode(Arg1_);
uint64_t ConstVal = 0;
// Only optimize in 32bits reg+const where const < 16Kb.
if (Arg1H->Size == 4 && IREmit->IsValueConstant(Arg1_, &ConstVal)) {
// Base is Arg0, Constant (Displacement in Arg1)
OrderedNode *Base = Arg0;
OrderedNode *Cnt = Arg1;
int32_t Val32 = (int32_t)ConstVal;
if (Val32 > -16384 && Val32 < 0) {
return std::make_optional(std::make_tuple(MEM_OFFSET_SXTW, 1, Base, Cnt));
} else if (Val32 >= 0 && Val32 < 16384) {
return std::make_optional(std::make_tuple(MEM_OFFSET_SXTX, 1, Base, Cnt));
}
} else if (AddressHeader->Size == 4) {
// Do not optimize 32bit reg+reg.
// Something like :
// add w20, w7, w5
// ldr w7, [x20]
//
// cannot be simplified to (or any other single load instruction)
// ldr w7, [x5, w7, sxtx]
return std::nullopt;
} else {
return std::make_optional(std::make_tuple(MEM_OFFSET_SXTX, 1, Arg0, Arg1));
}
return std::nullopt;
}
static OrderedNodeWrapper RemoveUselessMasking(IREmitter *IREmit, OrderedNodeWrapper src, uint64_t mask) {
@ -184,9 +238,10 @@ static bool IsBfeAlreadyDone(IREmitter *IREmit, OrderedNodeWrapper src, uint64_t
class ConstProp final : public FEXCore::IR::Pass {
public:
explicit ConstProp(bool DoInlineConstants, bool SupportsTSOImm9)
: InlineConstants(DoInlineConstants)
, SupportsTSOImm9 {SupportsTSOImm9} { }
explicit ConstProp(bool DoInlineConstants, bool SupportsTSOImm9,
bool Is64BitMode)
: InlineConstants(DoInlineConstants), SupportsTSOImm9{SupportsTSOImm9},
Is64BitMode(Is64BitMode) {}
bool Run(IREmitter *IREmit) override;
@ -219,6 +274,7 @@ private:
return Result.first->second;
}
bool SupportsTSOImm9{};
bool Is64BitMode;
// This is a heuristic to limit constant pool live ranges to reduce RA interference pressure.
// If the range is unbounded then RA interference pressure seems to increase to the point
// that long blocks of constant usage can slow to a crawl.
@ -490,8 +546,12 @@ bool ConstProp::ConstantPropagation(IREmitter *IREmit, const IRListView& Current
if (Op->Class == FEXCore::IR::FPRClass && AddressHeader->Op == OP_ADD && AddressHeader->Size == 8) {
// TODO: LRCPC3 supports a vector unscaled offset like LRCPC2.
// Support once hardware is available to use this.
auto [OffsetType, OffsetScale, Arg0, Arg1] = MemExtendedAddressing(IREmit, IROp->Size, AddressHeader);
auto MaybeMemAddr =
MemExtendedAddressing(IREmit, IROp->Size, AddressHeader);
if (!MaybeMemAddr) {
break;
}
auto [OffsetType, OffsetScale, Arg0, Arg1] = *MaybeMemAddr;
Op->OffsetType = OffsetType;
Op->OffsetScale = OffsetScale;
IREmit->ReplaceNodeArgument(CodeNode, Op->Addr_Index, Arg0); // Addr
@ -509,8 +569,12 @@ bool ConstProp::ConstantPropagation(IREmitter *IREmit, const IRListView& Current
if (Op->Class == FEXCore::IR::FPRClass && AddressHeader->Op == OP_ADD && AddressHeader->Size == 8) {
// TODO: LRCPC3 supports a vector unscaled offset like LRCPC2.
// Support once hardware is available to use this.
auto [OffsetType, OffsetScale, Arg0, Arg1] = MemExtendedAddressing(IREmit, IROp->Size, AddressHeader);
auto MaybeMemAddr =
MemExtendedAddressing(IREmit, IROp->Size, AddressHeader);
if (!MaybeMemAddr) {
break;
}
auto [OffsetType, OffsetScale, Arg0, Arg1] = *MaybeMemAddr;
Op->OffsetType = OffsetType;
Op->OffsetScale = OffsetScale;
IREmit->ReplaceNodeArgument(CodeNode, Op->Addr_Index, Arg0); // Addr
@ -525,12 +589,19 @@ bool ConstProp::ConstantPropagation(IREmitter *IREmit, const IRListView& Current
auto Op = IROp->CW<IR::IROp_LoadMem>();
auto AddressHeader = IREmit->GetOpHeader(Op->Addr);
if (AddressHeader->Op == OP_ADD && AddressHeader->Size == 8) {
auto [OffsetType, OffsetScale, Arg0, Arg1] = MemExtendedAddressing(IREmit, IROp->Size, AddressHeader);
if (AddressHeader->Op == OP_ADD &&
((Is64BitMode && AddressHeader->Size == 8) ||
(!Is64BitMode && AddressHeader->Size == 4))) {
auto MaybeMemAddr =
MemExtendedAddressing(IREmit, IROp->Size, AddressHeader);
if (!MaybeMemAddr) {
break;
}
auto [OffsetType, OffsetScale, Arg0, Arg1] = *MaybeMemAddr;
Op->OffsetType = OffsetType;
Op->OffsetScale = OffsetScale;
IREmit->ReplaceNodeArgument(CodeNode, Op->Addr_Index, Arg0); // Addr
IREmit->ReplaceNodeArgument(CodeNode, Op->Addr_Index, Arg0); // Addr
IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, Arg1); // Offset
Changed = true;
@ -542,8 +613,15 @@ bool ConstProp::ConstantPropagation(IREmitter *IREmit, const IRListView& Current
auto Op = IROp->CW<IR::IROp_StoreMem>();
auto AddressHeader = IREmit->GetOpHeader(Op->Addr);
if (AddressHeader->Op == OP_ADD && AddressHeader->Size == 8) {
auto [OffsetType, OffsetScale, Arg0, Arg1] = MemExtendedAddressing(IREmit, IROp->Size, AddressHeader);
if (AddressHeader->Op == OP_ADD &&
((Is64BitMode && AddressHeader->Size == 8) ||
(!Is64BitMode && AddressHeader->Size == 4))) {
auto MaybeMemAddr =
MemExtendedAddressing(IREmit, IROp->Size, AddressHeader);
if (!MaybeMemAddr) {
break;
}
auto [OffsetType, OffsetScale, Arg0, Arg1] = *MaybeMemAddr;
Op->OffsetType = OffsetType;
Op->OffsetScale = OffsetScale;
@ -1311,8 +1389,9 @@ bool ConstProp::Run(IREmitter *IREmit) {
return Changed;
}
fextl::unique_ptr<FEXCore::IR::Pass> CreateConstProp(bool InlineConstants, bool SupportsTSOImm9) {
return fextl::make_unique<ConstProp>(InlineConstants, SupportsTSOImm9);
fextl::unique_ptr<FEXCore::IR::Pass>
CreateConstProp(bool InlineConstants, bool SupportsTSOImm9, bool Is64BitMode) {
return fextl::make_unique<ConstProp>(InlineConstants, SupportsTSOImm9,
Is64BitMode);
}
}

View File

@ -0,0 +1,20 @@
%ifdef CONFIG
{
"RegData": {
"RAX": "0xdeadbeef"
},
"MemoryRegions": {
"0x10000000": "4096"
},
"MemoryData": {
"0x10000000": "0xdeadbeef"
},
"Mode": "32BIT"
}
%endif
section .text
lea eax, [0x10000040]
mov eax, [eax-0x40]
hlt

View File

@ -14,162 +14,141 @@
],
"Instructions": {
"movzx eax, byte [ecx - 257]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0xfffffeff",
"add w20, w5, w20",
"ldrb w4, [x20]"
"ldrb w4, [x5, w20, sxtw]"
]
},
"movzx eax, byte [ecx - 256]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0xffffff00",
"add w20, w5, w20",
"ldrb w4, [x20]"
"ldrb w4, [x5, w20, sxtw]"
]
},
"movzx eax, byte [ecx + 255]": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"add w20, w5, #0xff (255)",
"ldrb w4, [x20]"
"ldrb w4, [x5, #255]"
]
},
"movzx eax, byte [ecx + 256]": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"add w20, w5, #0x100 (256)",
"ldrb w4, [x20]"
"ldrb w4, [x5, #256]"
]
},
"movzx eax, byte [ecx + 4095]": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"add w20, w5, #0xfff (4095)",
"ldrb w4, [x20]"
"ldrb w4, [x5, #4095]"
]
},
"movzx eax, byte [ecx + 4096]": {
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"add w20, w5, #0x1000 (4096)",
"ldrb w4, [x20]"
"mov w20, #0x1000",
"ldrb w4, [x5, x20, sxtx]"
]
},
"movzx eax, word [ecx - 257]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0xfffffeff",
"add w20, w5, w20",
"ldrh w4, [x20]"
"ldrh w4, [x5, w20, sxtw]"
]
},
"movzx eax, word [ecx - 256]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0xffffff00",
"add w20, w5, w20",
"ldrh w4, [x20]"
"ldrh w4, [x5, w20, sxtw]"
]
},
"movzx eax, word [ecx + 255]": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"add w20, w5, #0xff (255)",
"ldrh w4, [x20]"
"ldurh w4, [x5, #255]"
]
},
"movzx eax, word [ecx + 256]": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"add w20, w5, #0x100 (256)",
"ldrh w4, [x20]"
"ldrh w4, [x5, #256]"
]
},
"movzx eax, word [ecx + 8190]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"mov w20, #0x1ffe",
"add w20, w5, w20",
"ldrh w4, [x20]"
"ldrh w4, [x5, #8190]"
]
},
"movzx eax, word [ecx + 8191]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0x1fff",
"add w20, w5, w20",
"ldrh w4, [x20]"
"ldrh w4, [x5, x20, sxtx]"
]
},
"movzx eax, word [ecx + 8192]": {
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"add w20, w5, #0x2000 (8192)",
"ldrh w4, [x20]"
"mov w20, #0x2000",
"ldrh w4, [x5, x20, sxtx]"
]
},
"mov eax, dword [ecx - 257]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0xfffffeff",
"add w20, w5, w20",
"ldr w4, [x20]"
"ldr w4, [x5, w20, sxtw]"
]
},
"mov eax, dword [ecx - 256]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0xffffff00",
"add w20, w5, w20",
"ldr w4, [x20]"
"ldr w4, [x5, w20, sxtw]"
]
},
"mov eax, dword [ecx + 255]": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"add w20, w5, #0xff (255)",
"ldr w4, [x20]"
"ldur w4, [x5, #255]"
]
},
"mov eax, dword [ecx + 256]": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"add w20, w5, #0x100 (256)",
"ldr w4, [x20]"
"ldr w4, [x5, #256]"
]
},
"mov eax, dword [ecx + 16380]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"mov w20, #0x3ffc",
"add w20, w5, w20",
"ldr w4, [x20]"
"ldr w4, [x5, #16380]"
]
},
"mov eax, dword [ecx + 16381]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0x3ffd",
"add w20, w5, w20",
"ldr w4, [x20]"
"ldr w4, [x5, x20, sxtx]"
]
},
"mov eax, dword [ecx + 16382]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0x3ffe",
"add w20, w5, w20",
"ldr w4, [x20]"
"ldr w4, [x5, x20, sxtx]"
]
},
"mov eax, dword [ecx + 16383]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0x3fff",
"add w20, w5, w20",
"ldr w4, [x20]"
"ldr w4, [x5, x20, sxtx]"
]
},
"mov eax, dword [ecx + 16384]": {
@ -180,57 +159,49 @@
]
},
"movss xmm0, [ecx + 16379]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0x3ffb",
"add w20, w5, w20",
"ldr s16, [x20]"
"ldr s16, [x5, x20, sxtx]"
]
},
"movss xmm0, [ecx + 16380]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"mov w20, #0x3ffc",
"add w20, w5, w20",
"ldr s16, [x20]"
"ldr s16, [x5, #16380]"
]
},
"movss xmm0, [ecx + 16381]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0x3ffd",
"add w20, w5, w20",
"ldr s16, [x20]"
"ldr s16, [x5, x20, sxtx]"
]
},
"movss xmm0, [ecx - 257]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0xfffffeff",
"add w20, w5, w20",
"ldr s16, [x20]"
"ldr s16, [x5, w20, sxtw]"
]
},
"movss xmm0, [ecx - 256]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0xffffff00",
"add w20, w5, w20",
"ldr s16, [x20]"
"ldr s16, [x5, w20, sxtw]"
]
},
"movss xmm0, [ecx + 255]": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"add w20, w5, #0xff (255)",
"ldr s16, [x20]"
"ldur s16, [x5, #255]"
]
},
"movss xmm0, [ecx + 256]": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"add w20, w5, #0x100 (256)",
"ldr s16, [x20]"
"ldr s16, [x5, #256]"
]
},
"movsd xmm0, [ecx + 32759]": {
@ -258,33 +229,29 @@
]
},
"movsd xmm0, [ecx - 257]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0xfffffeff",
"add w20, w5, w20",
"ldr d16, [x20]"
"ldr d16, [x5, w20, sxtw]"
]
},
"movsd xmm0, [ecx - 256]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0xffffff00",
"add w20, w5, w20",
"ldr d16, [x20]"
"ldr d16, [x5, w20, sxtw]"
]
},
"movsd xmm0, [ecx + 255]": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"add w20, w5, #0xff (255)",
"ldr d16, [x20]"
"ldur d16, [x5, #255]"
]
},
"movsd xmm0, [ecx + 256]": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"add w20, w5, #0x100 (256)",
"ldr d16, [x20]"
"ldr d16, [x5, #256]"
]
},
"movq xmm0, [ecx + 65519]": {
@ -312,33 +279,29 @@
]
},
"movq xmm0, [ecx - 257]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0xfffffeff",
"add w20, w5, w20",
"ldr d16, [x20]"
"ldr d16, [x5, w20, sxtw]"
]
},
"movq xmm0, [ecx - 256]": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"ExpectedArm64ASM": [
"mov w20, #0xffffff00",
"add w20, w5, w20",
"ldr d16, [x20]"
"ldr d16, [x5, w20, sxtw]"
]
},
"movq xmm0, [ecx + 255]": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"add w20, w5, #0xff (255)",
"ldr d16, [x20]"
"ldur d16, [x5, #255]"
]
},
"movq xmm0, [ecx + 256]": {
"ExpectedInstructionCount": 2,
"ExpectedInstructionCount": 1,
"ExpectedArm64ASM": [
"add w20, w5, #0x100 (256)",
"ldr d16, [x20]"
"ldr d16, [x5, #256]"
]
}
}

View File

@ -12,7 +12,7 @@
},
"Instructions": {
"Sonic Mania movie player": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 16,
"Comment": "Used to be hottest block in Sonic Mania",
"x86Insts": [
"movzx edx, byte [esi+ecx]",
@ -35,8 +35,7 @@
"lsl w6, w6, #8",
"add w10, w10, #0x1 (1)",
"orr w6, w6, w5",
"add w20, w9, #0xc (12)",
"ldr w5, [x20]",
"ldr w5, [x9, #12]",
"ldr w20, [x4]",
"orr w20, w20, w6",
"str w20, [x4]",
@ -47,7 +46,7 @@
]
},
"wine mscrt.dll memmove": {
"ExpectedInstructionCount": 20,
"ExpectedInstructionCount": 14,
"Comment": "Hot in Sonic Mania",
"x86Insts": [
"movdqu xmm0, [esi]",
@ -65,19 +64,13 @@
],
"ExpectedArm64ASM": [
"ldr q16, [x10]",
"add w20, w10, #0x10 (16)",
"ldr q17, [x20]",
"add w20, w10, #0x20 (32)",
"ldr q18, [x20]",
"add w20, w10, #0x30 (48)",
"ldr q19, [x20]",
"ldr q17, [x10, #16]",
"ldr q18, [x10, #32]",
"ldr q19, [x10, #48]",
"str q16, [x11]",
"add w20, w11, #0x10 (16)",
"str q17, [x20]",
"add w20, w11, #0x20 (32)",
"str q18, [x20]",
"add w20, w11, #0x30 (48)",
"str q19, [x20]",
"str q17, [x11, #16]",
"str q18, [x11, #32]",
"str q19, [x11, #48]",
"add w10, w10, #0x40 (64)",
"add w11, w11, #0x40 (64)",
"sub w5, w5, #0x40 (64)",
@ -87,7 +80,7 @@
]
},
"dxvk hotblock from MGRR": {
"ExpectedInstructionCount": 51,
"ExpectedInstructionCount": 43,
"Comment": [
"Hottest block in Metal Gear Rising: Revengeance render thread"
],
@ -108,18 +101,14 @@
"lock cmpxchg8b qword [esi+0x8]"
],
"ExpectedArm64ASM": [
"add w20, w4, #0xc (12)",
"ldr w6, [x20]",
"add w20, w4, #0x8 (8)",
"ldr w4, [x20]",
"ldr w6, [x4, #12]",
"ldr w4, [x4, #8]",
"mov w20, #0xffffffcc",
"add w20, w9, w20",
"str w10, [x20]",
"str w10, [x9, w20, sxtw]",
"mov w5, w4",
"mov w7, w6",
"mov w20, #0xffffffdc",
"add w20, w9, w20",
"ldr w10, [x20]",
"ldr w10, [x9, w20, sxtw]",
"mov w20, #0xffffffff",
"adds w21, w4, w20",
"mov w5, w21",
@ -127,17 +116,13 @@
"adcs w26, w6, w20",
"mov w7, w26",
"mov w20, #0xffffffd8",
"add w20, w9, w20",
"str w21, [x20]",
"str w21, [x9, w20, sxtw]",
"mov w20, #0xffffffd4",
"add w22, w9, w20",
"str w26, [x22]",
"str w26, [x9, w20, sxtw]",
"mov w7, w21",
"mov w22, #0xffffffd0",
"add w22, w9, w22",
"str w21, [x22]",
"add w20, w9, w20",
"ldr w5, [x20]",
"str w21, [x9, w22, sxtw]",
"ldr w5, [x9, w20, sxtw]",
"add w20, w10, #0x8 (8)",
"mov w22, w4",
"mov w23, w6",
@ -162,7 +147,7 @@
]
},
"Psychonauts matrix swizzle": {
"ExpectedInstructionCount": 2522,
"ExpectedInstructionCount": 2426,
"Comment": [
"Hottest block in Windows Psychonauts",
"Doing a 4x4 32-bit float matrix swizzle",
@ -283,10 +268,8 @@
"cfinv",
"mov w8, w26",
"mov w21, #0xffffffbc",
"add w22, w20, w21",
"str w5, [x22]",
"add w20, w20, w21",
"ldr w4, [x20]",
"str w5, [x20, w21, sxtw]",
"ldr w4, [x20, w21, sxtw]",
"ldrb w20, [x28, #747]",
"ldr s2, [x4]",
"mrs x0, nzcv",
@ -356,8 +339,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w23, #0xffffffc0",
"add w23, w9, w23",
"str s2, [x23]",
"str s2, [x9, w23, sxtw]",
"ldrb w23, [x28, #1026]",
"lsl w24, w22, w20",
"bic w23, w23, w24",
@ -365,10 +347,8 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w24, w9, w21",
"ldr w5, [x24]",
"add w24, w5, #0x10 (16)",
"ldr s2, [x24]",
"ldr w5, [x9, w21, sxtw]",
"ldr s2, [x5, #16]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -434,8 +414,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w23, #0xffffffc4",
"add w23, w9, w23",
"str s2, [x23]",
"str s2, [x9, w23, sxtw]",
"ldrb w23, [x28, #1026]",
"lsl w24, w22, w20",
"bic w23, w23, w24",
@ -443,10 +422,8 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w24, w9, w21",
"ldr w6, [x24]",
"add w24, w6, #0x20 (32)",
"ldr s2, [x24]",
"ldr w6, [x9, w21, sxtw]",
"ldr s2, [x6, #32]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -512,8 +489,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w23, #0xffffffc8",
"add w23, w9, w23",
"str s2, [x23]",
"str s2, [x9, w23, sxtw]",
"ldrb w23, [x28, #1026]",
"lsl w24, w22, w20",
"bic w23, w23, w24",
@ -521,10 +497,8 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w24, w9, w21",
"ldr w4, [x24]",
"add w24, w4, #0x30 (48)",
"ldr s2, [x24]",
"ldr w4, [x9, w21, sxtw]",
"ldr s2, [x4, #48]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -590,8 +564,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w23, #0xffffffcc",
"add w23, w9, w23",
"str s2, [x23]",
"str s2, [x9, w23, sxtw]",
"ldrb w23, [x28, #1026]",
"lsl w24, w22, w20",
"bic w23, w23, w24",
@ -599,10 +572,8 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w21, w9, w21",
"ldr w5, [x21]",
"add w21, w5, #0x4 (4)",
"ldr s2, [x21]",
"ldr w5, [x9, w21, sxtw]",
"ldr s2, [x5, #4]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -668,8 +639,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w21, #0xffffffd0",
"add w21, w9, w21",
"str s2, [x21]",
"str s2, [x9, w21, sxtw]",
"ldrb w21, [x28, #1026]",
"lsl w22, w22, w20",
"bic w21, w21, w22",
@ -678,10 +648,8 @@
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"mov w22, #0xffffffbc",
"add w23, w9, w22",
"ldr w6, [x23]",
"add w23, w6, #0x14 (20)",
"ldr s2, [x23]",
"ldr w6, [x9, w22, sxtw]",
"ldr s2, [x6, #20]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -748,8 +716,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w21, #0xffffffd4",
"add w21, w9, w21",
"str s2, [x21]",
"str s2, [x9, w21, sxtw]",
"ldrb w21, [x28, #1026]",
"lsl w24, w23, w20",
"bic w21, w21, w24",
@ -757,10 +724,8 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w24, w9, w22",
"ldr w4, [x24]",
"add w24, w4, #0x24 (36)",
"ldr s2, [x24]",
"ldr w4, [x9, w22, sxtw]",
"ldr s2, [x4, #36]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -826,8 +791,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w21, #0xffffffd8",
"add w21, w9, w21",
"str s2, [x21]",
"str s2, [x9, w21, sxtw]",
"ldrb w21, [x28, #1026]",
"lsl w24, w23, w20",
"bic w21, w21, w24",
@ -835,10 +799,8 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w24, w9, w22",
"ldr w5, [x24]",
"add w24, w5, #0x34 (52)",
"ldr s2, [x24]",
"ldr w5, [x9, w22, sxtw]",
"ldr s2, [x5, #52]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -904,8 +866,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w21, #0xffffffdc",
"add w21, w9, w21",
"str s2, [x21]",
"str s2, [x9, w21, sxtw]",
"ldrb w21, [x28, #1026]",
"lsl w24, w23, w20",
"bic w21, w21, w24",
@ -913,10 +874,8 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w24, w9, w22",
"ldr w6, [x24]",
"add w24, w6, #0x8 (8)",
"ldr s2, [x24]",
"ldr w6, [x9, w22, sxtw]",
"ldr s2, [x6, #8]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -982,8 +941,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w21, #0xffffffe0",
"add w21, w9, w21",
"str s2, [x21]",
"str s2, [x9, w21, sxtw]",
"ldrb w21, [x28, #1026]",
"lsl w24, w23, w20",
"bic w21, w21, w24",
@ -991,10 +949,8 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w22, w9, w22",
"ldr w4, [x22]",
"add w22, w4, #0x18 (24)",
"ldr s2, [x22]",
"ldr w4, [x9, w22, sxtw]",
"ldr s2, [x4, #24]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -1060,8 +1016,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w21, #0xffffffe4",
"add w21, w9, w21",
"str s2, [x21]",
"str s2, [x9, w21, sxtw]",
"ldrb w21, [x28, #1026]",
"lsl w22, w23, w20",
"bic w21, w21, w22",
@ -1070,10 +1025,8 @@
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"mov w22, #0xffffffbc",
"add w23, w9, w22",
"ldr w5, [x23]",
"add w23, w5, #0x28 (40)",
"ldr s2, [x23]",
"ldr w5, [x9, w22, sxtw]",
"ldr s2, [x5, #40]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -1140,8 +1093,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w21, #0xffffffe8",
"add w21, w9, w21",
"str s2, [x21]",
"str s2, [x9, w21, sxtw]",
"ldrb w21, [x28, #1026]",
"lsl w24, w23, w20",
"bic w21, w21, w24",
@ -1149,10 +1101,8 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w24, w9, w22",
"ldr w6, [x24]",
"add w24, w6, #0x38 (56)",
"ldr s2, [x24]",
"ldr w6, [x9, w22, sxtw]",
"ldr s2, [x6, #56]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -1218,8 +1168,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w21, #0xffffffec",
"add w21, w9, w21",
"str s2, [x21]",
"str s2, [x9, w21, sxtw]",
"ldrb w21, [x28, #1026]",
"lsl w24, w23, w20",
"bic w21, w21, w24",
@ -1227,10 +1176,8 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w24, w9, w22",
"ldr w4, [x24]",
"add w24, w4, #0xc (12)",
"ldr s2, [x24]",
"ldr w4, [x9, w22, sxtw]",
"ldr s2, [x4, #12]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -1296,8 +1243,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w21, #0xfffffff0",
"add w21, w9, w21",
"str s2, [x21]",
"str s2, [x9, w21, sxtw]",
"ldrb w21, [x28, #1026]",
"lsl w24, w23, w20",
"bic w21, w21, w24",
@ -1305,10 +1251,8 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w24, w9, w22",
"ldr w5, [x24]",
"add w24, w5, #0x1c (28)",
"ldr s2, [x24]",
"ldr w5, [x9, w22, sxtw]",
"ldr s2, [x5, #28]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -1374,8 +1318,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w21, #0xfffffff4",
"add w21, w9, w21",
"str s2, [x21]",
"str s2, [x9, w21, sxtw]",
"ldrb w21, [x28, #1026]",
"lsl w24, w23, w20",
"bic w21, w21, w24",
@ -1383,10 +1326,8 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w22, w9, w22",
"ldr w6, [x22]",
"add w22, w6, #0x2c (44)",
"ldr s2, [x22]",
"ldr w6, [x9, w22, sxtw]",
"ldr s2, [x6, #44]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -1452,8 +1393,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w21, #0xfffffff8",
"add w21, w9, w21",
"str s2, [x21]",
"str s2, [x9, w21, sxtw]",
"ldrb w21, [x28, #1026]",
"lsl w22, w23, w20",
"bic w21, w21, w22",
@ -1462,10 +1402,8 @@
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"mov w22, #0xffffffbc",
"add w22, w9, w22",
"ldr w4, [x22]",
"add w22, w4, #0x3c (60)",
"ldr s2, [x22]",
"ldr w4, [x9, w22, sxtw]",
"ldr s2, [x4, #60]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -1532,8 +1470,7 @@
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"mov w21, #0xfffffffc",
"add w21, w9, w21",
"str s2, [x21]",
"str s2, [x9, w21, sxtw]",
"ldrb w21, [x28, #1026]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
@ -1541,11 +1478,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w23, w9, #0x8 (8)",
"ldr w5, [x23]",
"ldr w5, [x9, #8]",
"mov w23, #0xffffffc0",
"add w23, w9, w23",
"ldr s2, [x23]",
"ldr s2, [x9, w23, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -1618,11 +1553,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w23, w9, #0x8 (8)",
"ldr w6, [x23]",
"ldr w6, [x9, #8]",
"mov w23, #0xffffffc4",
"add w23, w9, w23",
"ldr s2, [x23]",
"ldr s2, [x9, w23, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -1687,8 +1620,7 @@
"ldr x16, [sp], #16",
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"add w21, w6, #0x4 (4)",
"str s2, [x21]",
"str s2, [x6, #4]",
"ldrb w21, [x28, #1026]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
@ -1696,11 +1628,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w23, w9, #0x8 (8)",
"ldr w4, [x23]",
"ldr w4, [x9, #8]",
"mov w23, #0xffffffc8",
"add w23, w9, w23",
"ldr s2, [x23]",
"ldr s2, [x9, w23, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -1765,8 +1695,7 @@
"ldr x16, [sp], #16",
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"add w21, w4, #0x8 (8)",
"str s2, [x21]",
"str s2, [x4, #8]",
"ldrb w21, [x28, #1026]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
@ -1774,11 +1703,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w23, w9, #0x8 (8)",
"ldr w5, [x23]",
"ldr w5, [x9, #8]",
"mov w23, #0xffffffcc",
"add w23, w9, w23",
"ldr s2, [x23]",
"ldr s2, [x9, w23, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -1843,8 +1770,7 @@
"ldr x16, [sp], #16",
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"add w21, w5, #0xc (12)",
"str s2, [x21]",
"str s2, [x5, #12]",
"ldrb w21, [x28, #1026]",
"lsl w22, w22, w20",
"bic w21, w21, w22",
@ -1852,11 +1778,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w22, w9, #0x8 (8)",
"ldr w6, [x22]",
"ldr w6, [x9, #8]",
"mov w22, #0xffffffd0",
"add w22, w9, w22",
"ldr s2, [x22]",
"ldr s2, [x9, w22, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -1922,8 +1846,7 @@
"ldr x16, [sp], #16",
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"add w21, w6, #0x10 (16)",
"str s2, [x21]",
"str s2, [x6, #16]",
"ldrb w21, [x28, #1026]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
@ -1931,11 +1854,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w23, w9, #0x8 (8)",
"ldr w4, [x23]",
"ldr w4, [x9, #8]",
"mov w23, #0xffffffd4",
"add w23, w9, w23",
"ldr s2, [x23]",
"ldr s2, [x9, w23, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -2000,8 +1921,7 @@
"ldr x16, [sp], #16",
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"add w21, w4, #0x14 (20)",
"str s2, [x21]",
"str s2, [x4, #20]",
"ldrb w21, [x28, #1026]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
@ -2009,11 +1929,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w23, w9, #0x8 (8)",
"ldr w5, [x23]",
"ldr w5, [x9, #8]",
"mov w23, #0xffffffd8",
"add w23, w9, w23",
"ldr s2, [x23]",
"ldr s2, [x9, w23, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -2078,8 +1996,7 @@
"ldr x16, [sp], #16",
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"add w21, w5, #0x18 (24)",
"str s2, [x21]",
"str s2, [x5, #24]",
"ldrb w21, [x28, #1026]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
@ -2087,11 +2004,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w23, w9, #0x8 (8)",
"ldr w6, [x23]",
"ldr w6, [x9, #8]",
"mov w23, #0xffffffdc",
"add w23, w9, w23",
"ldr s2, [x23]",
"ldr s2, [x9, w23, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -2156,8 +2071,7 @@
"ldr x16, [sp], #16",
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"add w21, w6, #0x1c (28)",
"str s2, [x21]",
"str s2, [x6, #28]",
"ldrb w21, [x28, #1026]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
@ -2165,11 +2079,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w23, w9, #0x8 (8)",
"ldr w4, [x23]",
"ldr w4, [x9, #8]",
"mov w23, #0xffffffe0",
"add w23, w9, w23",
"ldr s2, [x23]",
"ldr s2, [x9, w23, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -2234,8 +2146,7 @@
"ldr x16, [sp], #16",
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"add w21, w4, #0x20 (32)",
"str s2, [x21]",
"str s2, [x4, #32]",
"ldrb w21, [x28, #1026]",
"lsl w22, w22, w20",
"bic w21, w21, w22",
@ -2243,11 +2154,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w22, w9, #0x8 (8)",
"ldr w5, [x22]",
"ldr w5, [x9, #8]",
"mov w22, #0xffffffe4",
"add w22, w9, w22",
"ldr s2, [x22]",
"ldr s2, [x9, w22, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -2313,8 +2222,7 @@
"ldr x16, [sp], #16",
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"add w21, w5, #0x24 (36)",
"str s2, [x21]",
"str s2, [x5, #36]",
"ldrb w21, [x28, #1026]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
@ -2322,11 +2230,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w23, w9, #0x8 (8)",
"ldr w6, [x23]",
"ldr w6, [x9, #8]",
"mov w23, #0xffffffe8",
"add w23, w9, w23",
"ldr s2, [x23]",
"ldr s2, [x9, w23, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -2391,8 +2297,7 @@
"ldr x16, [sp], #16",
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"add w21, w6, #0x28 (40)",
"str s2, [x21]",
"str s2, [x6, #40]",
"ldrb w21, [x28, #1026]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
@ -2400,11 +2305,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w23, w9, #0x8 (8)",
"ldr w4, [x23]",
"ldr w4, [x9, #8]",
"mov w23, #0xffffffec",
"add w23, w9, w23",
"ldr s2, [x23]",
"ldr s2, [x9, w23, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -2469,8 +2372,7 @@
"ldr x16, [sp], #16",
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"add w21, w4, #0x2c (44)",
"str s2, [x21]",
"str s2, [x4, #44]",
"ldrb w21, [x28, #1026]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
@ -2478,11 +2380,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w23, w9, #0x8 (8)",
"ldr w5, [x23]",
"ldr w5, [x9, #8]",
"mov w23, #0xfffffff0",
"add w23, w9, w23",
"ldr s2, [x23]",
"ldr s2, [x9, w23, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -2547,8 +2447,7 @@
"ldr x16, [sp], #16",
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"add w21, w5, #0x30 (48)",
"str s2, [x21]",
"str s2, [x5, #48]",
"ldrb w21, [x28, #1026]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
@ -2556,11 +2455,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w23, w9, #0x8 (8)",
"ldr w6, [x23]",
"ldr w6, [x9, #8]",
"mov w23, #0xfffffff4",
"add w23, w9, w23",
"ldr s2, [x23]",
"ldr s2, [x9, w23, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -2625,8 +2522,7 @@
"ldr x16, [sp], #16",
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"add w21, w6, #0x34 (52)",
"str s2, [x21]",
"str s2, [x6, #52]",
"ldrb w21, [x28, #1026]",
"lsl w22, w22, w20",
"bic w21, w21, w22",
@ -2634,11 +2530,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w22, w9, #0x8 (8)",
"ldr w4, [x22]",
"ldr w4, [x9, #8]",
"mov w22, #0xfffffff8",
"add w22, w9, w22",
"ldr s2, [x22]",
"ldr s2, [x9, w22, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -2704,8 +2598,7 @@
"ldr x16, [sp], #16",
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"add w21, w4, #0x38 (56)",
"str s2, [x21]",
"str s2, [x4, #56]",
"ldrb w21, [x28, #1026]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
@ -2713,11 +2606,9 @@
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"add w23, w9, #0x8 (8)",
"ldr w5, [x23]",
"ldr w5, [x9, #8]",
"mov w23, #0xfffffffc",
"add w23, w9, w23",
"ldr s2, [x23]",
"ldr s2, [x9, w23, sxtw]",
"mrs x0, nzcv",
"str w0, [x28, #728]",
"stp x4, x5, [x28, #8]",
@ -2782,8 +2673,7 @@
"ldr x16, [sp], #16",
"ldp x17, x30, [sp], #16",
"fmov s2, s0",
"add w21, w5, #0x3c (60)",
"str s2, [x21]",
"str s2, [x5, #60]",
"ldrb w21, [x28, #1026]",
"lsl w22, w22, w20",
"bic w21, w21, w22",
@ -2792,8 +2682,7 @@
"and w20, w20, #0x7",
"strb w20, [x28, #747]",
"mov w8, w9",
"add w20, w8, #0x8 (8)",
"ldr w4, [x20]",
"ldr w4, [x8, #8]",
"ldr w9, [x8]",
"add x8, x8, #0x4 (4)"
]

View File

@ -380,18 +380,18 @@
"ExpectedArm64ASM": [
"ldr w11, [x8]",
"add x20, x8, #0x4 (4)",
"ldr w10, [x8, #4]",
"add x21, x20, #0x4 (4)",
"ldr w9, [x20, #4]",
"add x20, x21, #0x8 (8)",
"ldr w7, [x21, #8]",
"add x21, x20, #0x4 (4)",
"ldr w6, [x20, #4]",
"add x20, x21, #0x4 (4)",
"ldr w5, [x21, #4]",
"add x21, x20, #0x4 (4)",
"ldr w4, [x20, #4]",
"add x8, x21, #0x4 (4)"
"ldr w10, [x20]",
"add x20, x20, #0x4 (4)",
"ldr w9, [x20]",
"add x20, x20, #0x8 (8)",
"ldr w7, [x20]",
"add x20, x20, #0x4 (4)",
"ldr w6, [x20]",
"add x20, x20, #0x4 (4)",
"ldr w5, [x20]",
"add x20, x20, #0x4 (4)",
"ldr w4, [x20]",
"add x8, x20, #0x4 (4)"
]
},
"popad": {
@ -400,18 +400,18 @@
"ExpectedArm64ASM": [
"ldr w11, [x8]",
"add x20, x8, #0x4 (4)",
"ldr w10, [x8, #4]",
"add x21, x20, #0x4 (4)",
"ldr w9, [x20, #4]",
"add x20, x21, #0x8 (8)",
"ldr w7, [x21, #8]",
"add x21, x20, #0x4 (4)",
"ldr w6, [x20, #4]",
"add x20, x21, #0x4 (4)",
"ldr w5, [x21, #4]",
"add x21, x20, #0x4 (4)",
"ldr w4, [x20, #4]",
"add x8, x21, #0x4 (4)"
"ldr w10, [x20]",
"add x20, x20, #0x4 (4)",
"ldr w9, [x20]",
"add x20, x20, #0x8 (8)",
"ldr w7, [x20]",
"add x20, x20, #0x4 (4)",
"ldr w6, [x20]",
"add x20, x20, #0x4 (4)",
"ldr w5, [x20]",
"add x20, x20, #0x4 (4)",
"ldr w4, [x20]",
"add x8, x20, #0x4 (4)"
]
},
"aam": {