mirror of
https://github.com/libretro/ppsspp.git
synced 2025-02-15 14:40:20 +00:00
ARM64: fmla encoding, more disasm
This commit is contained in:
parent
2780eef595
commit
0849e270ee
@ -2052,7 +2052,7 @@ void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type,
|
||||
(size == 128 ? (1 << 23) : 0) | (opc << 22) | (encoded_imm << 10) | (Rn << 5) | Rt);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::Emit2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
void ARM64FloatEmitter::EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, !IsQuad(Rd), "%s only supports double and single registers!", __FUNCTION__);
|
||||
Rd = DecodeReg(Rd);
|
||||
@ -2683,55 +2683,55 @@ void ARM64FloatEmitter::FSQRT(ARM64Reg Rd, ARM64Reg Rn)
|
||||
// Scalar - 2 Source
|
||||
void ARM64FloatEmitter::FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
Emit2Source(0, 0, IsDouble(Rd), 2, Rd, Rn, Rm);
|
||||
EmitScalar2Source(0, 0, IsDouble(Rd), 2, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
Emit2Source(0, 0, IsDouble(Rd), 0, Rd, Rn, Rm);
|
||||
EmitScalar2Source(0, 0, IsDouble(Rd), 0, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
Emit2Source(0, 0, IsDouble(Rd), 3, Rd, Rn, Rm);
|
||||
EmitScalar2Source(0, 0, IsDouble(Rd), 3, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
Emit2Source(0, 0, IsDouble(Rd), 1, Rd, Rn, Rm);
|
||||
EmitScalar2Source(0, 0, IsDouble(Rd), 1, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
Emit2Source(0, 0, IsDouble(Rd), 4, Rd, Rn, Rm);
|
||||
EmitScalar2Source(0, 0, IsDouble(Rd), 4, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
Emit2Source(0, 0, IsDouble(Rd), 5, Rd, Rn, Rm);
|
||||
EmitScalar2Source(0, 0, IsDouble(Rd), 5, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
Emit2Source(0, 0, IsDouble(Rd), 6, Rd, Rn, Rm);
|
||||
EmitScalar2Source(0, 0, IsDouble(Rd), 6, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
Emit2Source(0, 0, IsDouble(Rd), 7, Rd, Rn, Rm);
|
||||
EmitScalar2Source(0, 0, IsDouble(Rd), 7, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
Emit2Source(0, 0, IsDouble(Rd), 8, Rd, Rn, Rm);
|
||||
EmitScalar2Source(0, 0, IsDouble(Rd), 8, Rd, Rn, Rm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
|
||||
Emit3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 0);
|
||||
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 0);
|
||||
}
|
||||
void ARM64FloatEmitter::FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
|
||||
Emit3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 1);
|
||||
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 1);
|
||||
}
|
||||
void ARM64FloatEmitter::FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
|
||||
Emit3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 2);
|
||||
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 2);
|
||||
}
|
||||
void ARM64FloatEmitter::FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
|
||||
Emit3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 3);
|
||||
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 3);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::Emit3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode) {
|
||||
void ARM64FloatEmitter::EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode) {
|
||||
int type = isDouble ? 1 : 0;
|
||||
Rd = DecodeReg(Rd);
|
||||
Rn = DecodeReg(Rn);
|
||||
@ -3235,20 +3235,32 @@ void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8
|
||||
|
||||
bool L = false;
|
||||
bool H = false;
|
||||
|
||||
if (size == 32)
|
||||
{
|
||||
if (size == 32) {
|
||||
L = index & 1;
|
||||
H = (index >> 1) & 1;
|
||||
}
|
||||
else if (size == 64)
|
||||
{
|
||||
} else if (size == 64) {
|
||||
H = index == 1;
|
||||
}
|
||||
|
||||
EmitVectorxElement(0, 2 | (size >> 6), L, 0x9, H, Rd, Rn, Rm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, size == 32 || size == 64, "%s only supports 32bit or 64bit size!", __FUNCTION__);
|
||||
|
||||
bool L = false;
|
||||
bool H = false;
|
||||
if (size == 32) {
|
||||
L = index & 1;
|
||||
H = (index >> 1) & 1;
|
||||
} else if (size == 64) {
|
||||
H = index == 1;
|
||||
}
|
||||
|
||||
EmitVectorxElement(0, 2 | (size >> 6), L, 1, H, Rd, Rn, Rm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers)
|
||||
{
|
||||
for (auto it : registers)
|
||||
@ -3271,10 +3283,12 @@ void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mas
|
||||
|
||||
void ARM64XEmitter::ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
|
||||
unsigned int n, imm_s, imm_r;
|
||||
if (!Is64Bit(Rn))
|
||||
imm &= 0xFFFFFFFF;
|
||||
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
|
||||
AND(Rd, Rn, imm_r, imm_s, n);
|
||||
} else {
|
||||
_assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
|
||||
_assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
|
||||
MOVI2R(scratch, imm);
|
||||
AND(Rd, Rn, scratch);
|
||||
}
|
||||
@ -3285,7 +3299,7 @@ void ARM64XEmitter::ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
|
||||
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
|
||||
ORR(Rd, Rn, imm_r, imm_s, n);
|
||||
} else {
|
||||
_assert_msg_(JIT, scratch != INVALID_REG, "ORRI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
|
||||
_assert_msg_(JIT, scratch != INVALID_REG, "ORRI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
|
||||
MOVI2R(scratch, imm);
|
||||
ORR(Rd, Rn, scratch);
|
||||
}
|
||||
@ -3296,7 +3310,7 @@ void ARM64XEmitter::EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
|
||||
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
|
||||
EOR(Rd, Rn, imm_r, imm_s, n);
|
||||
} else {
|
||||
_assert_msg_(JIT, scratch != INVALID_REG, "EORI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
|
||||
_assert_msg_(JIT, scratch != INVALID_REG, "EORI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
|
||||
MOVI2R(scratch, imm);
|
||||
EOR(Rd, Rn, scratch);
|
||||
}
|
||||
@ -3307,7 +3321,7 @@ void ARM64XEmitter::ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
|
||||
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
|
||||
ANDS(Rd, Rn, imm_r, imm_s, n);
|
||||
} else {
|
||||
_assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
|
||||
_assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
|
||||
MOVI2R(scratch, imm);
|
||||
ANDS(Rd, Rn, scratch);
|
||||
}
|
||||
@ -3319,7 +3333,7 @@ void ARM64XEmitter::ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
|
||||
if (IsImmArithmetic(imm, &val, &shift)) {
|
||||
ADD(Rd, Rn, val, shift);
|
||||
} else {
|
||||
_assert_msg_(JIT, scratch != INVALID_REG, "ADDI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
|
||||
_assert_msg_(JIT, scratch != INVALID_REG, "ADDI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);
|
||||
MOVI2R(scratch, imm);
|
||||
ADD(Rd, Rn, scratch);
|
||||
}
|
||||
@ -3331,7 +3345,7 @@ void ARM64XEmitter::SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
|
||||
if (IsImmArithmetic(imm, &val, &shift)) {
|
||||
SUB(Rd, Rn, val, shift);
|
||||
} else {
|
||||
_assert_msg_(JIT, scratch != INVALID_REG, "SUBI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
|
||||
_assert_msg_(JIT, scratch != INVALID_REG, "SUBI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);
|
||||
MOVI2R(scratch, imm);
|
||||
SUB(Rd, Rn, scratch);
|
||||
}
|
||||
@ -3343,7 +3357,7 @@ void ARM64XEmitter::CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
|
||||
if (IsImmArithmetic(imm, &val, &shift)) {
|
||||
CMP(Rn, val, shift);
|
||||
} else {
|
||||
_assert_msg_(JIT, scratch != INVALID_REG, "CMPI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
|
||||
_assert_msg_(JIT, scratch != INVALID_REG, "CMPI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);
|
||||
MOVI2R(scratch, imm);
|
||||
CMP(Rn, scratch);
|
||||
}
|
||||
|
@ -872,6 +872,7 @@ public:
|
||||
|
||||
// vector x indexed element
|
||||
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
|
||||
void FMLA(u8 esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
|
||||
|
||||
void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);
|
||||
|
||||
@ -885,7 +886,7 @@ private:
|
||||
|
||||
// Emitting functions
|
||||
void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
|
||||
void Emit2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void Emit2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
@ -904,7 +905,7 @@ private:
|
||||
void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
|
||||
void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign);
|
||||
void Emit3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode);
|
||||
void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode);
|
||||
};
|
||||
|
||||
class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>
|
||||
|
@ -405,8 +405,19 @@ static void DataProcessingRegister(uint32_t w, uint64_t addr, Instruction *instr
|
||||
}
|
||||
}
|
||||
|
||||
inline bool GetQ(uint32_t w) { return (w >> 30) & 1; }
|
||||
inline bool GetU(uint32_t w) { return (w >> 29) & 1; }
|
||||
const char *GetArrangement(bool Q, bool sz) {
|
||||
if (Q == 0 && sz == 0) return "2s";
|
||||
else if (Q == 1 && sz == 0) return "4s";
|
||||
else if (Q == 1 && sz == 1) return "2d";
|
||||
else return "ERROR";
|
||||
}
|
||||
// (w >> 25) & 0xF == 7
|
||||
static void FPandASIMD1(uint32_t w, uint64_t addr, Instruction *instr) {
|
||||
int Rd = w & 0x1f;
|
||||
int Rn = (w >> 5) & 0x1f;
|
||||
int Rm = (w >> 16) & 0x1f;
|
||||
if (((w >> 21) & 0x4F9) == 0x71) {
|
||||
switch ((w >> 10) & 3) {
|
||||
case 1: case 3:
|
||||
@ -430,13 +441,38 @@ static void FPandASIMD1(uint32_t w, uint64_t addr, Instruction *instr) {
|
||||
}
|
||||
} else if (((w >> 21) & 0x4F8) == 0x78) {
|
||||
if ((w >> 10) & 1) {
|
||||
if (((w >> 19) & 0xf) == 0) {
|
||||
if (((w >> 19) & 0xf) == 0) {
|
||||
snprintf(instr->text, sizeof(instr->text), "(asimd modified immediate %08x)", w);
|
||||
} else {
|
||||
snprintf(instr->text, sizeof(instr->text), "(asimd shift-by-immediate %08x)", w);
|
||||
}
|
||||
} else {
|
||||
snprintf(instr->text, sizeof(instr->text), "(asimd vector x indexed elem %08x)", w);
|
||||
bool Q = GetQ(w);
|
||||
bool U = GetU(w);
|
||||
int size = (w >> 22) & 3;
|
||||
bool L = (w >> 21) & 1;
|
||||
bool M = (w >> 20) & 1;
|
||||
bool H = (w >> 11) & 1;
|
||||
int opcode = (w >> 12) & 0xf;
|
||||
if (size & 0x2) {
|
||||
const char *opname = 0;
|
||||
switch (opcode) {
|
||||
case 1: opname = "fmla"; break;
|
||||
case 5: opname = "fmls"; break;
|
||||
case 9: opname = "fmul"; break;
|
||||
}
|
||||
int index;
|
||||
if ((size & 1) == 0) {
|
||||
index = (H << 1) | L;
|
||||
} else {
|
||||
index = H;
|
||||
}
|
||||
char r = Q ? 'q' : 'd';
|
||||
const char *arrangement = GetArrangement(Q, size & 1);
|
||||
snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, %c%d.%s[%d]", opname, r, Rd, r, Rn, r, Rm, arrangement, index);
|
||||
} else {
|
||||
snprintf(instr->text, sizeof(instr->text), "(asimd vector x indexed elem %08x)", w);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bail:
|
||||
@ -516,10 +552,12 @@ static void FPandASIMD2(uint32_t w, uint64_t addr, Instruction *instr) {
|
||||
} else if (((w >> 10) & 3) == 2) {
|
||||
int opc = (w >> 12) & 0xf;
|
||||
const char *opnames[9] = { "fmul", "fdiv", "fadd", "fsub", "fmax", "fmin", "fmaxnm", "fminnm", "fnmul" };
|
||||
char r = 's'; // TODO: Support doubles too
|
||||
char r = ((w >> 22) & 1) ? 'd' : 's';
|
||||
snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, %c%d", opnames[opc], r, Rd, r, Rn, r, Rm);
|
||||
} else if (((w >> 10) & 3) == 3) {
|
||||
snprintf(instr->text, sizeof(instr->text), "(float cond select %08x)", w);
|
||||
char fr = ((w >> 22) & 1) ? 'd' : 's';
|
||||
int cond = (w >> 12) & 0xf;
|
||||
snprintf(instr->text, sizeof(instr->text), "fcsel %c%d, %c%d, %c%d, %s", fr, Rd, fr, Rn, fr, Rm, condnames[cond]);
|
||||
}
|
||||
} else if (((w >> 21) & 0x2F8) == 0xF8) {
|
||||
int opcode = ((w >> 15) & 1) | ((w >> 20) & 2);
|
||||
|
@ -39,6 +39,12 @@ bool TestArm64Emitter() {
|
||||
|
||||
//emitter.EXTR(W1, W3, 0, 7);
|
||||
//RET(CheckLast(emitter, "53033061 extr w1, w3, w7"));
|
||||
fp.FMUL(32, Q0, Q1, Q2, 3);
|
||||
RET(CheckLast(emitter, "4fa29820 fmul q0, q1, q2.4s[3]")); // A real disasm says fmla v0.2s, v1.2s, v2.s[1] but I think our way is more readable
|
||||
fp.FMLA(32, D0, D1, D2, 1);
|
||||
RET(CheckLast(emitter, "1e222c20 fmla d0, d1, d2.2s[1]"));
|
||||
fp.FCSEL(S0, S1, S2, CC_CS);
|
||||
RET(CheckLast(emitter, "1e222c20 fcsel s0, s1, s2, cs"));
|
||||
float value = 1.0;
|
||||
uint8_t imm8;
|
||||
FPImm8FromFloat(value, &imm8);
|
||||
@ -46,7 +52,7 @@ bool TestArm64Emitter() {
|
||||
RET(CheckLast(emitter, "1e2e1007 fmov s7, #1.000000"));
|
||||
FPImm8FromFloat(-value, &imm8);
|
||||
fp.FMOV(S7, imm8);
|
||||
RET(CheckLast(emitter, "1e2e1007 fmov s7, #-1.000000"));
|
||||
RET(CheckLast(emitter, "0fa21020 fmov s7, #-1.000000"));
|
||||
fp.FMADD(S1, S2, S3, S4);
|
||||
RET(CheckLast(emitter, "1f031041 fmadd s1, s2, s3, s4"));
|
||||
fp.FNMSUB(D1, D2, D3, D4);
|
||||
@ -151,7 +157,7 @@ bool TestArm64Emitter() {
|
||||
RET(CheckLast(emitter, "1e2020e8 fcmp s7, #0.0"));
|
||||
fp.FCMP(D7, D3);
|
||||
RET(CheckLast(emitter, "1e6320e0 fcmp d7, d3"));
|
||||
emitter.ORI2R(X1, X3, 0x3F, INVALID_REG);
|
||||
emitter.ORRI2R(X1, X3, 0x3F, INVALID_REG);
|
||||
RET(CheckLast(emitter, "b2401461 orr x1, x3, #0x3f"));
|
||||
emitter.EORI2R(X1, X3, 0x3F0000003F0, INVALID_REG);
|
||||
RET(CheckLast(emitter, "d21c1461 eor x1, x3, #0x3f0000003f0"));
|
||||
|
Loading…
x
Reference in New Issue
Block a user