mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 05:19:56 +00:00
Merge pull request #9241 from hrydgard/arm64-abi
Follow the ARM64 ABI better (update the frame pointer).
This commit is contained in:
commit
f28fec3fa5
@ -1913,17 +1913,25 @@ inline int64_t abs64(int64_t x) {
|
||||
return x >= 0 ? x : -x;
|
||||
}
|
||||
|
||||
int Count(bool part[4]) {
|
||||
int cnt = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (part[i])
|
||||
cnt++;
|
||||
}
|
||||
return cnt;
|
||||
}
|
||||
|
||||
// Wrapper around MOVZ+MOVK (and later MOVN)
|
||||
void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
|
||||
{
|
||||
unsigned int parts = Is64Bit(Rd) ? 4 : 2;
|
||||
BitSet32 upload_part(0);
|
||||
bool upload_part[4];
|
||||
|
||||
// Always start with a movz! Kills the dependency on the register.
|
||||
bool use_movz = true;
|
||||
|
||||
if (!imm)
|
||||
{
|
||||
if (!imm) {
|
||||
// Zero immediate, just clear the register. EOR is pointless when we have MOVZ, which looks clearer in disasm too.
|
||||
MOVZ(Rd, 0, SHIFT_0);
|
||||
return;
|
||||
@ -1961,7 +1969,7 @@ void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
|
||||
|
||||
u64 aligned_pc = (u64)GetCodePointer() & ~0xFFF;
|
||||
s64 aligned_offset = (s64)imm - (s64)aligned_pc;
|
||||
if (upload_part.Count() > 1 && abs64(aligned_offset) < 0xFFFFFFFFLL)
|
||||
if (Count(upload_part) > 1 && abs64(aligned_offset) < 0xFFFFFFFFLL)
|
||||
{
|
||||
// Immediate we are loading is within 4GB of our aligned range
|
||||
// Most likely a address that we can load in one or two instructions
|
||||
@ -2015,115 +2023,11 @@ void ARM64XEmitter::POP(ARM64Reg Rd) {
|
||||
void ARM64XEmitter::PUSH2(ARM64Reg Rd, ARM64Reg Rn) {
|
||||
STP(INDEX_PRE, Rd, Rn, SP, -16);
|
||||
}
|
||||
|
||||
void ARM64XEmitter::POP2(ARM64Reg Rd, ARM64Reg Rn) {
|
||||
LDP(INDEX_POST, Rd, Rn, SP, 16);
|
||||
}
|
||||
|
||||
|
||||
void ARM64XEmitter::ABI_PushRegisters(BitSet32 registers)
|
||||
{
|
||||
int num_regs = registers.Count();
|
||||
|
||||
if (num_regs % 2)
|
||||
{
|
||||
bool first = true;
|
||||
|
||||
// Stack is required to be quad-word aligned.
|
||||
u32 stack_size = ROUND_UP(num_regs * 8, 16);
|
||||
u32 current_offset = 0;
|
||||
std::vector<ARM64Reg> reg_pair;
|
||||
|
||||
for (auto it : registers)
|
||||
{
|
||||
if (first)
|
||||
{
|
||||
STR(INDEX_PRE, (ARM64Reg)(X0 + it), SP, -(s32)stack_size);
|
||||
first = false;
|
||||
current_offset += 16;
|
||||
}
|
||||
else
|
||||
{
|
||||
reg_pair.push_back((ARM64Reg)(X0 + it));
|
||||
if (reg_pair.size() == 2)
|
||||
{
|
||||
STP(INDEX_UNSIGNED, reg_pair[0], reg_pair[1], SP, current_offset);
|
||||
reg_pair.clear();
|
||||
current_offset += 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<ARM64Reg> reg_pair;
|
||||
|
||||
for (auto it : registers)
|
||||
{
|
||||
reg_pair.push_back((ARM64Reg)(X0 + it));
|
||||
if (reg_pair.size() == 2)
|
||||
{
|
||||
STP(INDEX_PRE, reg_pair[0], reg_pair[1], SP, -16);
|
||||
reg_pair.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ARM64XEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask)
|
||||
{
|
||||
int num_regs = registers.Count();
|
||||
|
||||
if (num_regs % 2)
|
||||
{
|
||||
bool first = true;
|
||||
|
||||
std::vector<ARM64Reg> reg_pair;
|
||||
|
||||
for (auto it : registers)
|
||||
{
|
||||
if (ignore_mask[it])
|
||||
it = WSP;
|
||||
|
||||
if (first)
|
||||
{
|
||||
LDR(INDEX_POST, (ARM64Reg)(X0 + it), SP, 16);
|
||||
first = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
reg_pair.push_back((ARM64Reg)(X0 + it));
|
||||
if (reg_pair.size() == 2)
|
||||
{
|
||||
LDP(INDEX_POST, reg_pair[0], reg_pair[1], SP, 16);
|
||||
reg_pair.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<ARM64Reg> reg_pair;
|
||||
|
||||
for (int i = 31; i >= 0; --i)
|
||||
{
|
||||
if (!registers[i])
|
||||
continue;
|
||||
|
||||
int reg = i;
|
||||
|
||||
if (ignore_mask[reg])
|
||||
reg = WSP;
|
||||
|
||||
reg_pair.push_back((ARM64Reg)(X0 + reg));
|
||||
if (reg_pair.size() == 2)
|
||||
{
|
||||
LDP(INDEX_POST, reg_pair[1], reg_pair[0], SP, 16);
|
||||
reg_pair.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Float Emitter
|
||||
void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
|
||||
{
|
||||
@ -3658,161 +3562,95 @@ void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8
|
||||
EmitVectorxElement(0, 2 | (size >> 6), L, 1, H, Rd, Rn, Rm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp)
|
||||
{
|
||||
bool bundled_loadstore = false;
|
||||
void ARM64FloatEmitter::ABI_PushRegisters(uint32_t registers, uint32_t fp_registers) {
|
||||
_assert_msg_(DYNA_REC, (registers & 0x60000000) == 0, "ABI_PushRegisters: Do not include FP and LR, those are handled non-conditionally");
|
||||
|
||||
for (int i = 0; i < 32; ++i)
|
||||
{
|
||||
if (!registers[i])
|
||||
continue;
|
||||
|
||||
int count = 0;
|
||||
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
|
||||
if (count > 1)
|
||||
{
|
||||
bundled_loadstore = true;
|
||||
break;
|
||||
}
|
||||
ARM64Reg gprs[32]{}, fprs[32]{};
|
||||
int num_gprs = 0, num_fprs = 0;
|
||||
for (int i = 0; i < 29; i++) {
|
||||
if (registers & (1U << i))
|
||||
gprs[num_gprs++] = (ARM64Reg)(X0 + i);
|
||||
}
|
||||
|
||||
if (bundled_loadstore && tmp != INVALID_REG)
|
||||
{
|
||||
int num_regs = registers.Count();
|
||||
m_emit->SUB(SP, SP, num_regs * 16);
|
||||
m_emit->ADD(tmp, SP, 0);
|
||||
std::vector<ARM64Reg> island_regs;
|
||||
for (int i = 0; i < 32; ++i)
|
||||
{
|
||||
if (!registers[i])
|
||||
continue;
|
||||
|
||||
int count = 0;
|
||||
|
||||
// 0 = true
|
||||
// 1 < 4 && registers[i + 1] true!
|
||||
// 2 < 4 && registers[i + 2] true!
|
||||
// 3 < 4 && registers[i + 3] true!
|
||||
// 4 < 4 && registers[i + 4] false!
|
||||
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
|
||||
|
||||
if (count == 1)
|
||||
island_regs.push_back((ARM64Reg)(Q0 + i));
|
||||
else
|
||||
ST1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), tmp);
|
||||
|
||||
i += count - 1;
|
||||
}
|
||||
|
||||
// Handle island registers
|
||||
std::vector<ARM64Reg> pair_regs;
|
||||
for (auto& it : island_regs)
|
||||
{
|
||||
pair_regs.push_back(it);
|
||||
if (pair_regs.size() == 2)
|
||||
{
|
||||
STP(128, INDEX_POST, pair_regs[0], pair_regs[1], tmp, 32);
|
||||
pair_regs.clear();
|
||||
}
|
||||
}
|
||||
if (pair_regs.size())
|
||||
STR(128, INDEX_POST, pair_regs[0], tmp, 16);
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if (fp_registers & (1U << i))
|
||||
fprs[num_fprs++] = (ARM64Reg)(D0 + i);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<ARM64Reg> pair_regs;
|
||||
for (auto it : registers)
|
||||
{
|
||||
pair_regs.push_back((ARM64Reg)(Q0 + it));
|
||||
if (pair_regs.size() == 2)
|
||||
{
|
||||
STP(128, INDEX_PRE, pair_regs[0], pair_regs[1], SP, -32);
|
||||
pair_regs.clear();
|
||||
}
|
||||
}
|
||||
if (pair_regs.size())
|
||||
STR(128, INDEX_PRE, pair_regs[0], SP, -16);
|
||||
|
||||
u32 stack_size = 16 + ROUND_UP(num_gprs * 8, 16) + ROUND_UP(num_fprs * 8, 16);
|
||||
|
||||
// Stack is required to be quad-word aligned.
|
||||
if (stack_size < 256) {
|
||||
m_emit->STP(INDEX_PRE, FP, LR, SP, -(s32)stack_size);
|
||||
} else {
|
||||
m_emit->SUB(SP, SP, stack_size);
|
||||
m_emit->STP(INDEX_UNSIGNED, FP, LR, SP, 0);
|
||||
}
|
||||
m_emit->MOVfromSP(X29); // Set new frame pointer
|
||||
int offset = 16;
|
||||
for (int i = 0; i < num_gprs / 2; i++) {
|
||||
m_emit->STP(INDEX_SIGNED, gprs[i*2], gprs[i*2+1], X29, offset);
|
||||
offset += 16;
|
||||
}
|
||||
if (num_gprs & 1) {
|
||||
m_emit->STR(INDEX_UNSIGNED, gprs[num_gprs - 1], X29, offset);
|
||||
offset += 16;
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_fprs / 2; i++) {
|
||||
STP(64, INDEX_SIGNED, fprs[i * 2], fprs[i * 2 + 1], SP, offset);
|
||||
offset += 16;
|
||||
}
|
||||
if (num_fprs & 1) {
|
||||
STR(64, INDEX_UNSIGNED, fprs[num_fprs - 1], X29, offset);
|
||||
offset += 16;
|
||||
}
|
||||
// Now offset should be == stack_size.
|
||||
}
|
||||
void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp)
|
||||
{
|
||||
bool bundled_loadstore = false;
|
||||
int num_regs = registers.Count();
|
||||
|
||||
for (int i = 0; i < 32; ++i)
|
||||
{
|
||||
if (!registers[i])
|
||||
continue;
|
||||
|
||||
int count = 0;
|
||||
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
|
||||
if (count > 1)
|
||||
{
|
||||
bundled_loadstore = true;
|
||||
break;
|
||||
}
|
||||
void ARM64FloatEmitter::ABI_PopRegisters(uint32_t registers, uint32_t fp_registers) {
|
||||
ARM64Reg gprs[32]{}, fprs[32]{};
|
||||
int num_gprs = 0, num_fprs = 0;
|
||||
for (int i = 0; i < 29; i++) {
|
||||
if (registers & (1U << i))
|
||||
gprs[num_gprs++] = (ARM64Reg)(X0 + i);
|
||||
}
|
||||
|
||||
if (bundled_loadstore && tmp != INVALID_REG)
|
||||
{
|
||||
// The temporary register is only used to indicate that we can use this code path
|
||||
std::vector<ARM64Reg> island_regs;
|
||||
for (int i = 0; i < 32; ++i)
|
||||
{
|
||||
if (!registers[i])
|
||||
continue;
|
||||
|
||||
int count = 0;
|
||||
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
|
||||
|
||||
if (count == 1)
|
||||
island_regs.push_back((ARM64Reg)(Q0 + i));
|
||||
else
|
||||
LD1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), SP);
|
||||
|
||||
i += count - 1;
|
||||
}
|
||||
|
||||
// Handle island registers
|
||||
std::vector<ARM64Reg> pair_regs;
|
||||
for (auto& it : island_regs)
|
||||
{
|
||||
pair_regs.push_back(it);
|
||||
if (pair_regs.size() == 2)
|
||||
{
|
||||
LDP(128, INDEX_POST, pair_regs[0], pair_regs[1], SP, 32);
|
||||
pair_regs.clear();
|
||||
}
|
||||
}
|
||||
if (pair_regs.size())
|
||||
LDR(128, INDEX_POST, pair_regs[0], SP, 16);
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if (fp_registers & (1U << i))
|
||||
fprs[num_fprs++] = (ARM64Reg)(D0 + i);
|
||||
}
|
||||
else
|
||||
{
|
||||
bool odd = (num_regs % 2) != 0;
|
||||
std::vector<ARM64Reg> pair_regs;
|
||||
for (int i = 31; i >= 0; --i)
|
||||
{
|
||||
if (!registers[i])
|
||||
continue;
|
||||
|
||||
if (odd)
|
||||
{
|
||||
// First load must be a regular LDR if odd
|
||||
odd = false;
|
||||
LDR(128, INDEX_POST, (ARM64Reg)(Q0 + i), SP, 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
pair_regs.push_back((ARM64Reg)(Q0 + i));
|
||||
if (pair_regs.size() == 2)
|
||||
{
|
||||
LDP(128, INDEX_POST, pair_regs[1], pair_regs[0], SP, 32);
|
||||
pair_regs.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
u32 stack_size = 16 + ROUND_UP(num_gprs * 8, 16) + ROUND_UP(num_fprs * 8, 16);
|
||||
|
||||
// SP points to the bottom. We're gonna walk it upwards.
|
||||
// Reload FP, LR.
|
||||
m_emit->LDP(INDEX_SIGNED, FP, LR, SP, 0);
|
||||
int offset = 16;
|
||||
for (int i = 0; i < num_gprs / 2; i++) {
|
||||
m_emit->LDP(INDEX_SIGNED, gprs[i*2], gprs[i*2+1], SP, offset);
|
||||
offset += 16;
|
||||
}
|
||||
// Do the straggler.
|
||||
if (num_gprs & 1) {
|
||||
m_emit->LDR(INDEX_UNSIGNED, gprs[num_gprs-1], SP, offset);
|
||||
offset += 16;
|
||||
}
|
||||
|
||||
// Time for the FP regs.
|
||||
for (int i = 0; i < num_fprs / 2; i++) {
|
||||
LDP(64, INDEX_SIGNED, fprs[i * 2], fprs[i * 2 + 1], SP, offset);
|
||||
offset += 16;
|
||||
}
|
||||
// Do the straggler.
|
||||
if (num_fprs & 1) {
|
||||
LDR(64, INDEX_UNSIGNED, fprs[num_fprs-1], SP, offset);
|
||||
offset += 16;
|
||||
}
|
||||
// Now offset should be == stack_size.
|
||||
|
||||
// Restore the stack pointer.
|
||||
m_emit->ADD(SP, SP, stack_size);
|
||||
}
|
||||
|
||||
void ARM64XEmitter::ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
|
||||
|
@ -85,12 +85,14 @@ enum ARM64Reg
|
||||
|
||||
WZR = WSP,
|
||||
ZR = SP,
|
||||
FP = X29,
|
||||
LR = X30,
|
||||
|
||||
INVALID_REG = 0xFFFFFFFF
|
||||
};
|
||||
|
||||
// R19-R28, R29 (FP), R30 (LR). FP seems questionable?
|
||||
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
|
||||
// R19-R28. R29 (FP), R30 (LR) are always saved and FP updated appropriately.
|
||||
const u32 ALL_CALLEE_SAVED = 0x1FF80000;
|
||||
const u32 ALL_CALLEE_SAVED_FP = 0x0000FF00; // d8-d15
|
||||
|
||||
inline bool Is64Bit(ARM64Reg reg) { return (reg & 0x20) != 0; }
|
||||
@ -720,10 +722,6 @@ public:
|
||||
bool TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
||||
bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
||||
|
||||
// ABI related
|
||||
void ABI_PushRegisters(BitSet32 registers);
|
||||
void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0));
|
||||
|
||||
// Pseudo-instruction for convenience. PUSH pushes 16 bytes even though we only push a single register.
|
||||
// This is so the stack pointer is always 16-byte aligned, which is checked by hardware!
|
||||
void PUSH(ARM64Reg Rd);
|
||||
@ -943,8 +941,8 @@ public:
|
||||
void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG);
|
||||
|
||||
// ABI related
|
||||
void ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG);
|
||||
void ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG);
|
||||
void ABI_PushRegisters(uint32_t gpr_registers, uint32_t fp_registers);
|
||||
void ABI_PopRegisters(uint32_t gpr_registers, uint32_t fp_registers);
|
||||
|
||||
private:
|
||||
ARM64XEmitter* m_emit;
|
||||
|
143
Common/BitSet.h
143
Common/BitSet.h
@ -3,8 +3,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <initializer_list>
|
||||
#include <type_traits>
|
||||
#include "CommonTypes.h"
|
||||
|
||||
// Helper functions:
|
||||
@ -12,8 +10,7 @@
|
||||
#ifdef _WIN32
|
||||
#include <intrin.h>
|
||||
template <typename T>
|
||||
static inline int CountSetBits(T v)
|
||||
{
|
||||
inline int CountSetBits(T v) {
|
||||
// from https://graphics.stanford.edu/~seander/bithacks.html
|
||||
// GCC has this built in, but MSVC's intrinsic will only emit the actual
|
||||
// POPCNT instruction, which we're not depending on
|
||||
@ -22,14 +19,14 @@ static inline int CountSetBits(T v)
|
||||
v = (v + (v >> 4)) & (T)~(T)0/255*15;
|
||||
return (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8;
|
||||
}
|
||||
static inline int LeastSignificantSetBit(u32 val)
|
||||
inline int LeastSignificantSetBit(u32 val)
|
||||
{
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, val);
|
||||
return (int)index;
|
||||
}
|
||||
#ifdef _M_X64
|
||||
static inline int LeastSignificantSetBit(u64 val)
|
||||
inline int LeastSignificantSetBit(u64 val)
|
||||
{
|
||||
unsigned long index;
|
||||
_BitScanForward64(&index, val);
|
||||
@ -37,134 +34,8 @@ static inline int LeastSignificantSetBit(u64 val)
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
static inline int CountSetBits(u32 val) { return __builtin_popcount(val); }
|
||||
static inline int CountSetBits(u64 val) { return __builtin_popcountll(val); }
|
||||
static inline int LeastSignificantSetBit(u32 val) { return __builtin_ctz(val); }
|
||||
static inline int LeastSignificantSetBit(u64 val) { return __builtin_ctzll(val); }
|
||||
inline int CountSetBits(u32 val) { return __builtin_popcount(val); }
|
||||
inline int CountSetBits(u64 val) { return __builtin_popcountll(val); }
|
||||
inline int LeastSignificantSetBit(u32 val) { return __builtin_ctz(val); }
|
||||
inline int LeastSignificantSetBit(u64 val) { return __builtin_ctzll(val); }
|
||||
#endif
|
||||
|
||||
// namespace avoids conflict with OS X Carbon; don't use BitSet<T> directly
|
||||
namespace BS
|
||||
{
|
||||
|
||||
// Similar to std::bitset, this is a class which encapsulates a bitset, i.e.
|
||||
// using the set bits of an integer to represent a set of integers. Like that
|
||||
// class, it acts like an array of bools:
|
||||
// BitSet32 bs;
|
||||
// bs[1] = true;
|
||||
// but also like the underlying integer ([0] = least significant bit):
|
||||
// BitSet32 bs2 = ...;
|
||||
// bs = (bs ^ bs2) & BitSet32(0xffff);
|
||||
// The following additional functionality is provided:
|
||||
// - Construction using an initializer list.
|
||||
// BitSet bs { 1, 2, 4, 8 };
|
||||
// - Efficiently iterating through the set bits:
|
||||
// for (int i : bs)
|
||||
// [i is the *index* of a set bit]
|
||||
// (This uses the appropriate CPU instruction to find the next set bit in one
|
||||
// operation.)
|
||||
// - Counting set bits using .Count() - see comment on that method.
|
||||
|
||||
// TODO: use constexpr when MSVC gets out of the Dark Ages
|
||||
|
||||
template <typename IntTy>
|
||||
class BitSet
|
||||
{
|
||||
static_assert(!std::is_signed<IntTy>::value, "BitSet should not be used with signed types");
|
||||
public:
|
||||
// A reference to a particular bit, returned from operator[].
|
||||
class Ref
|
||||
{
|
||||
public:
|
||||
Ref(Ref&& other) : m_bs(other.m_bs), m_mask(other.m_mask) {}
|
||||
Ref(BitSet* bs, IntTy mask) : m_bs(bs), m_mask(mask) {}
|
||||
operator bool() const { return (m_bs->m_val & m_mask) != 0; }
|
||||
bool operator=(bool set)
|
||||
{
|
||||
m_bs->m_val = (m_bs->m_val & ~m_mask) | (set ? m_mask : 0);
|
||||
return set;
|
||||
}
|
||||
private:
|
||||
BitSet* m_bs;
|
||||
IntTy m_mask;
|
||||
};
|
||||
|
||||
// A STL-like iterator is required to be able to use range-based for loops.
|
||||
class Iterator
|
||||
{
|
||||
public:
|
||||
Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
|
||||
Iterator(IntTy val, int bit) : m_val(val), m_bit(bit) {}
|
||||
Iterator& operator=(Iterator other) { new (this) Iterator(other); return *this; }
|
||||
int operator*() { return m_bit; }
|
||||
Iterator& operator++()
|
||||
{
|
||||
if (m_val == 0)
|
||||
{
|
||||
m_bit = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
int bit = LeastSignificantSetBit(m_val);
|
||||
m_val &= ~(1 << bit);
|
||||
m_bit = bit;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
Iterator operator++(int _)
|
||||
{
|
||||
Iterator other(*this);
|
||||
++*this;
|
||||
return other;
|
||||
}
|
||||
bool operator==(Iterator other) const { return m_bit == other.m_bit; }
|
||||
bool operator!=(Iterator other) const { return m_bit != other.m_bit; }
|
||||
private:
|
||||
IntTy m_val;
|
||||
int m_bit;
|
||||
};
|
||||
|
||||
BitSet() : m_val(0) {}
|
||||
explicit BitSet(IntTy val) : m_val(val) {}
|
||||
BitSet(std::initializer_list<int> init)
|
||||
{
|
||||
m_val = 0;
|
||||
for (int bit : init)
|
||||
m_val |= (IntTy)1 << bit;
|
||||
}
|
||||
|
||||
static BitSet AllTrue(size_t count)
|
||||
{
|
||||
return BitSet(count == sizeof(IntTy)*8 ? ~(IntTy)0 : (((IntTy)1 << count) - 1));
|
||||
}
|
||||
|
||||
Ref operator[](size_t bit) { return Ref(this, (IntTy)1 << bit); }
|
||||
const Ref operator[](size_t bit) const { return (*const_cast<BitSet*>(this))[bit]; }
|
||||
bool operator==(BitSet other) const { return m_val == other.m_val; }
|
||||
bool operator!=(BitSet other) const { return m_val != other.m_val; }
|
||||
BitSet operator|(BitSet other) const { return BitSet(m_val | other.m_val); }
|
||||
BitSet operator&(BitSet other) const { return BitSet(m_val & other.m_val); }
|
||||
BitSet operator^(BitSet other) const { return BitSet(m_val ^ other.m_val); }
|
||||
BitSet operator~() const { return BitSet(~m_val); }
|
||||
BitSet& operator|=(BitSet other) { return *this = *this | other; }
|
||||
BitSet& operator&=(BitSet other) { return *this = *this & other; }
|
||||
BitSet& operator^=(BitSet other) { return *this = *this ^ other; }
|
||||
operator u32() = delete;
|
||||
operator bool() { return m_val != 0; }
|
||||
|
||||
// Warning: Even though on modern CPUs this is a single fast instruction,
|
||||
// Dolphin's official builds do not currently assume POPCNT support on x86,
|
||||
// so slower explicit bit twiddling is generated. Still should generally
|
||||
// be faster than a loop.
|
||||
unsigned int Count() const { return CountSetBits(m_val); }
|
||||
|
||||
Iterator begin() const { Iterator it(m_val, 0); return ++it; }
|
||||
Iterator end() const { return Iterator(m_val, -1); }
|
||||
|
||||
IntTy m_val;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
typedef BS::BitSet<u32> BitSet32;
|
||||
typedef BS::BitSet<u64> BitSet64;
|
||||
|
@ -198,10 +198,9 @@ void Arm64Jit::GenerateFixedCode(const JitOptions &jo) {
|
||||
|
||||
enterDispatcher = AlignCode16();
|
||||
|
||||
BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED);
|
||||
BitSet32 regs_to_save_fp(Arm64Gen::ALL_CALLEE_SAVED_FP);
|
||||
ABI_PushRegisters(regs_to_save);
|
||||
fp.ABI_PushRegisters(regs_to_save_fp);
|
||||
uint32_t regs_to_save = Arm64Gen::ALL_CALLEE_SAVED;
|
||||
uint32_t regs_to_save_fp = Arm64Gen::ALL_CALLEE_SAVED_FP;
|
||||
fp.ABI_PushRegisters(regs_to_save, regs_to_save_fp);
|
||||
|
||||
// Fixed registers, these are always kept when in Jit context.
|
||||
MOVP2R(MEMBASEREG, Memory::base);
|
||||
@ -290,8 +289,7 @@ void Arm64Jit::GenerateFixedCode(const JitOptions &jo) {
|
||||
SaveStaticRegisters();
|
||||
RestoreRoundingMode(true);
|
||||
|
||||
fp.ABI_PopRegisters(regs_to_save_fp);
|
||||
ABI_PopRegisters(regs_to_save);
|
||||
fp.ABI_PopRegisters(regs_to_save, regs_to_save_fp);
|
||||
|
||||
RET();
|
||||
|
||||
|
@ -175,10 +175,9 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
||||
|
||||
// if (skinning) log = true;
|
||||
|
||||
BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED);
|
||||
BitSet32 regs_to_save_fp(Arm64Gen::ALL_CALLEE_SAVED_FP);
|
||||
ABI_PushRegisters(regs_to_save);
|
||||
fp.ABI_PushRegisters(regs_to_save_fp);
|
||||
uint64_t regs_to_save = Arm64Gen::ALL_CALLEE_SAVED;
|
||||
uint64_t regs_to_save_fp = Arm64Gen::ALL_CALLEE_SAVED_FP;
|
||||
fp.ABI_PushRegisters(regs_to_save, regs_to_save_fp);
|
||||
|
||||
// Keep the scale/offset in a few fp registers if we need it.
|
||||
if (prescaleStep) {
|
||||
@ -279,8 +278,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
||||
STRH(INDEX_UNSIGNED, boundsMaxVReg, scratchReg64, offsetof(KnownVertexBounds, maxV));
|
||||
}
|
||||
|
||||
fp.ABI_PopRegisters(regs_to_save_fp);
|
||||
ABI_PopRegisters(regs_to_save);
|
||||
fp.ABI_PopRegisters(regs_to_save, regs_to_save_fp);
|
||||
|
||||
RET();
|
||||
|
||||
|
@ -39,10 +39,9 @@ void TestCode::Generate()
|
||||
|
||||
const u8 *start = AlignCode16();
|
||||
|
||||
BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED);
|
||||
BitSet32 regs_to_save_fp(Arm64Gen::ALL_CALLEE_SAVED_FP);
|
||||
ABI_PushRegisters(regs_to_save);
|
||||
fp.ABI_PushRegisters(regs_to_save_fp);
|
||||
uint32_t regs_to_save = Arm64Gen::ALL_CALLEE_SAVED;
|
||||
uint32_t regs_to_save_fp = Arm64Gen::ALL_CALLEE_SAVED_FP;
|
||||
fp.ABI_PushRegisters(regs_to_save, regs_to_save_fp);
|
||||
|
||||
PUSH(X3);
|
||||
POP(X3);
|
||||
@ -54,8 +53,7 @@ void TestCode::Generate()
|
||||
fp.SCVTF(S3, W12);
|
||||
MOVI2R(X0, 1337);
|
||||
|
||||
ABI_PopRegisters(regs_to_save);
|
||||
fp.ABI_PopRegisters(regs_to_save_fp);
|
||||
fp.ABI_PopRegisters(regs_to_save, regs_to_save_fp);
|
||||
|
||||
RET();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user