From d495ad5104ad70c590308f7f262f52511f7a494e Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 31 Aug 2015 14:03:32 -0500 Subject: [PATCH 1/2] [AArch64] Make TST reg, reg emitter alias --- Source/Core/Common/Arm64Emitter.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index f6cb834a5d..8ca8c001a0 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -562,6 +562,10 @@ public: void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false); void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false); void TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert = false); + void TST(ARM64Reg Rn, ARM64Reg Rm) + { + ANDS(Is64Bit(Rn) ? ZR : WZR, Rn, Rm); + } // Add/subtract (immediate) void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false); From ae0a06a018ce8eb582d3841829b50226619f3302 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 31 Aug 2015 14:03:57 -0500 Subject: [PATCH 2/2] [AArch64] Implement dcbz instruction --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 1 + .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 12 +++ .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 75 +++++++++++++++++++ .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 2 +- .../Core/PowerPC/JitArmCommon/BackPatch.h | 3 + 5 files changed, 92 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 881626491b..c02b196082 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -129,6 +129,7 @@ public: void lmw(UGeckoInstruction inst); void stmw(UGeckoInstruction inst); void dcbt(UGeckoInstruction inst); + void dcbz(UGeckoInstruction inst); // LoadStore floating point void lfXX(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 020e72fa0b..37420a2cf0 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -100,6 +100,13 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, else STRB(RS, X28, addr); } + else if (flags & BackPatchInfo::FLAG_ZERO_256) + { + // This literally only stores 32bytes of zeros to the target address + ADD(addr, addr, X28); + STP(INDEX_SIGNED, ZR, ZR, addr, 0); + STP(INDEX_SIGNED, ZR, ZR, addr, 16); + } else { if (flags & BackPatchInfo::FLAG_SIZE_32) @@ -212,6 +219,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, BLR(X30); } + else if (flags & BackPatchInfo::FLAG_ZERO_256) + { + MOVI2R(X30, (u64)&PowerPC::ClearCacheLine); + BLR(X30); + } else { if (flags & BackPatchInfo::FLAG_SIZE_32) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 6894498f92..52cf6ce94f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -663,3 +663,78 @@ void JitArm64::dcbt(UGeckoInstruction inst) js.skipInstructions = 1; } } + +void JitArm64::dcbz(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreOff); + + int a = inst.RA, b = inst.RB; + + u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS; + + // The following masks the region used by the GC/Wii virtual memory lib + mem_mask |= Memory::ADDR_MASK_MEM1; + + gpr.Lock(W0); + + ARM64Reg addr_reg = W0; + + if (a) + { + bool is_imm_a, is_imm_b; + is_imm_a = gpr.IsImm(a); + is_imm_b = gpr.IsImm(b); + if (is_imm_a && is_imm_b) + { + // full imm_addr + u32 imm_addr = gpr.GetImm(b) + gpr.GetImm(a); + MOVI2R(addr_reg, imm_addr); + } + else if (is_imm_a || is_imm_b) + { + // Only one register is an immediate + ARM64Reg base = is_imm_a ? gpr.R(b) : gpr.R(a); + u32 imm_offset = is_imm_a ? gpr.GetImm(a) : gpr.GetImm(b); + if (imm_offset < 4096) + { + ADD(addr_reg, base, imm_offset); + } + else + { + MOVI2R(addr_reg, imm_offset); + ADD(addr_reg, addr_reg, base); + } + } + else + { + // Both are registers + ADD(addr_reg, gpr.R(a), gpr.R(b)); + } + } + else + { + // RA isn't used, only RB + if (gpr.IsImm(b)) + { + u32 imm_addr = gpr.GetImm(b); + MOVI2R(addr_reg, imm_addr); + } + else + { + MOV(addr_reg, gpr.R(b)); + } + } + + // We don't care about being /too/ terribly efficient here + // As long as we aren't falling back to interpreter we're winning a lot + + BitSet32 gprs_to_push = gpr.GetCallerSavedUsed(); + BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); + gprs_to_push[W0] = 0; + + EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, true, true, W0, EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push); + + gpr.Unlock(W0); + +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 8e0432658e..ada18aca26 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -225,7 +225,7 @@ static GekkoOPTemplate table31[] = {278, &JitArm64::dcbt}, // dcbt {470, &JitArm64::FallBackToInterpreter}, // dcbi {758, &JitArm64::DoNothing}, // dcba - {1014, &JitArm64::FallBackToInterpreter}, // dcbz + {1014, &JitArm64::dcbz}, // dcbz //load word {23, &JitArm64::lXX}, // lwzx diff --git a/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h b/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h index d3b6f46c31..b7f68b0b23 100644 --- a/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h +++ b/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h @@ -19,6 +19,7 @@ struct BackPatchInfo FLAG_REVERSE = (1 << 7), FLAG_EXTEND = (1 << 8), FLAG_SIZE_F32I = (1 << 9), + FLAG_ZERO_256 = (1 << 10), }; static u32 GetFlagSize(u32 flags) @@ -33,6 +34,8 @@ struct BackPatchInfo return 32; if (flags & FLAG_SIZE_F64) return 64; + if (flags & FLAG_ZERO_256) + return 256; return 0; } };