ARM64: fmla encoding, more disasm

This commit is contained in:
Henrik Rydgard 2015-03-22 11:05:36 +01:00
parent 2780eef595
commit 0849e270ee
4 changed files with 95 additions and 36 deletions

View File

@ -2052,7 +2052,7 @@ void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type,
(size == 128 ? (1 << 23) : 0) | (opc << 22) | (encoded_imm << 10) | (Rn << 5) | Rt);
}
void ARM64FloatEmitter::Emit2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
void ARM64FloatEmitter::EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
_assert_msg_(DYNA_REC, !IsQuad(Rd), "%s only supports double and single registers!", __FUNCTION__);
Rd = DecodeReg(Rd);
@ -2683,55 +2683,55 @@ void ARM64FloatEmitter::FSQRT(ARM64Reg Rd, ARM64Reg Rn)
// Scalar - 2 Source
void ARM64FloatEmitter::FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
Emit2Source(0, 0, IsDouble(Rd), 2, Rd, Rn, Rm);
EmitScalar2Source(0, 0, IsDouble(Rd), 2, Rd, Rn, Rm);
}
void ARM64FloatEmitter::FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
Emit2Source(0, 0, IsDouble(Rd), 0, Rd, Rn, Rm);
EmitScalar2Source(0, 0, IsDouble(Rd), 0, Rd, Rn, Rm);
}
void ARM64FloatEmitter::FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
Emit2Source(0, 0, IsDouble(Rd), 3, Rd, Rn, Rm);
EmitScalar2Source(0, 0, IsDouble(Rd), 3, Rd, Rn, Rm);
}
void ARM64FloatEmitter::FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
Emit2Source(0, 0, IsDouble(Rd), 1, Rd, Rn, Rm);
EmitScalar2Source(0, 0, IsDouble(Rd), 1, Rd, Rn, Rm);
}
void ARM64FloatEmitter::FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
Emit2Source(0, 0, IsDouble(Rd), 4, Rd, Rn, Rm);
EmitScalar2Source(0, 0, IsDouble(Rd), 4, Rd, Rn, Rm);
}
void ARM64FloatEmitter::FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
Emit2Source(0, 0, IsDouble(Rd), 5, Rd, Rn, Rm);
EmitScalar2Source(0, 0, IsDouble(Rd), 5, Rd, Rn, Rm);
}
void ARM64FloatEmitter::FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
Emit2Source(0, 0, IsDouble(Rd), 6, Rd, Rn, Rm);
EmitScalar2Source(0, 0, IsDouble(Rd), 6, Rd, Rn, Rm);
}
void ARM64FloatEmitter::FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
Emit2Source(0, 0, IsDouble(Rd), 7, Rd, Rn, Rm);
EmitScalar2Source(0, 0, IsDouble(Rd), 7, Rd, Rn, Rm);
}
void ARM64FloatEmitter::FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
Emit2Source(0, 0, IsDouble(Rd), 8, Rd, Rn, Rm);
EmitScalar2Source(0, 0, IsDouble(Rd), 8, Rd, Rn, Rm);
}
void ARM64FloatEmitter::FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
Emit3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 0);
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 0);
}
void ARM64FloatEmitter::FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
Emit3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 1);
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 1);
}
void ARM64FloatEmitter::FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
Emit3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 2);
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 2);
}
void ARM64FloatEmitter::FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
Emit3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 3);
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 3);
}
void ARM64FloatEmitter::Emit3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode) {
void ARM64FloatEmitter::EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode) {
int type = isDouble ? 1 : 0;
Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn);
@ -3235,20 +3235,32 @@ void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8
bool L = false;
bool H = false;
if (size == 32)
{
if (size == 32) {
L = index & 1;
H = (index >> 1) & 1;
}
else if (size == 64)
{
} else if (size == 64) {
H = index == 1;
}
EmitVectorxElement(0, 2 | (size >> 6), L, 0x9, H, Rd, Rn, Rm);
}
void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index)
{
_assert_msg_(DYNA_REC, size == 32 || size == 64, "%s only supports 32bit or 64bit size!", __FUNCTION__);
bool L = false;
bool H = false;
if (size == 32) {
L = index & 1;
H = (index >> 1) & 1;
} else if (size == 64) {
H = index == 1;
}
EmitVectorxElement(0, 2 | (size >> 6), L, 1, H, Rd, Rn, Rm);
}
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers)
{
for (auto it : registers)
@ -3271,10 +3283,12 @@ void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mas
void ARM64XEmitter::ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
unsigned int n, imm_s, imm_r;
if (!Is64Bit(Rn))
imm &= 0xFFFFFFFF;
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
AND(Rd, Rn, imm_r, imm_s, n);
} else {
_assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
_assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
AND(Rd, Rn, scratch);
}
@ -3285,7 +3299,7 @@ void ARM64XEmitter::ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
ORR(Rd, Rn, imm_r, imm_s, n);
} else {
_assert_msg_(JIT, scratch != INVALID_REG, "ORRI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
_assert_msg_(JIT, scratch != INVALID_REG, "ORRI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
ORR(Rd, Rn, scratch);
}
@ -3296,7 +3310,7 @@ void ARM64XEmitter::EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
EOR(Rd, Rn, imm_r, imm_s, n);
} else {
_assert_msg_(JIT, scratch != INVALID_REG, "EORI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
_assert_msg_(JIT, scratch != INVALID_REG, "EORI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
EOR(Rd, Rn, scratch);
}
@ -3307,7 +3321,7 @@ void ARM64XEmitter::ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
ANDS(Rd, Rn, imm_r, imm_s, n);
} else {
_assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
_assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
ANDS(Rd, Rn, scratch);
}
@ -3319,7 +3333,7 @@ void ARM64XEmitter::ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
if (IsImmArithmetic(imm, &val, &shift)) {
ADD(Rd, Rn, val, shift);
} else {
_assert_msg_(JIT, scratch != INVALID_REG, "ADDI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
_assert_msg_(JIT, scratch != INVALID_REG, "ADDI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
ADD(Rd, Rn, scratch);
}
@ -3331,7 +3345,7 @@ void ARM64XEmitter::SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
if (IsImmArithmetic(imm, &val, &shift)) {
SUB(Rd, Rn, val, shift);
} else {
_assert_msg_(JIT, scratch != INVALID_REG, "SUBI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
_assert_msg_(JIT, scratch != INVALID_REG, "SUBI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
SUB(Rd, Rn, scratch);
}
@ -3343,7 +3357,7 @@ void ARM64XEmitter::CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
if (IsImmArithmetic(imm, &val, &shift)) {
CMP(Rn, val, shift);
} else {
_assert_msg_(JIT, scratch != INVALID_REG, "CMPI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
_assert_msg_(JIT, scratch != INVALID_REG, "CMPI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
CMP(Rn, scratch);
}

View File

@ -872,6 +872,7 @@ public:
// vector x indexed element
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
void FMLA(u8 esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);
@ -885,7 +886,7 @@ private:
// Emitting functions
void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void Emit2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
void Emit2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
@ -904,7 +905,7 @@ private:
void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign);
void Emit3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode);
void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode);
};
class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>

View File

@ -405,8 +405,19 @@ static void DataProcessingRegister(uint32_t w, uint64_t addr, Instruction *instr
}
}
inline bool GetQ(uint32_t w) { return (w >> 30) & 1; }
inline bool GetU(uint32_t w) { return (w >> 29) & 1; }
const char *GetArrangement(bool Q, bool sz) {
if (Q == 0 && sz == 0) return "2s";
else if (Q == 1 && sz == 0) return "4s";
else if (Q == 1 && sz == 1) return "2d";
else return "ERROR";
}
// (w >> 25) & 0xF == 7
static void FPandASIMD1(uint32_t w, uint64_t addr, Instruction *instr) {
int Rd = w & 0x1f;
int Rn = (w >> 5) & 0x1f;
int Rm = (w >> 16) & 0x1f;
if (((w >> 21) & 0x4F9) == 0x71) {
switch ((w >> 10) & 3) {
case 1: case 3:
@ -430,13 +441,38 @@ static void FPandASIMD1(uint32_t w, uint64_t addr, Instruction *instr) {
}
} else if (((w >> 21) & 0x4F8) == 0x78) {
if ((w >> 10) & 1) {
if (((w >> 19) & 0xf) == 0) {
if (((w >> 19) & 0xf) == 0) {
snprintf(instr->text, sizeof(instr->text), "(asimd modified immediate %08x)", w);
} else {
snprintf(instr->text, sizeof(instr->text), "(asimd shift-by-immediate %08x)", w);
}
} else {
snprintf(instr->text, sizeof(instr->text), "(asimd vector x indexed elem %08x)", w);
bool Q = GetQ(w);
bool U = GetU(w);
int size = (w >> 22) & 3;
bool L = (w >> 21) & 1;
bool M = (w >> 20) & 1;
bool H = (w >> 11) & 1;
int opcode = (w >> 12) & 0xf;
if (size & 0x2) {
const char *opname = 0;
switch (opcode) {
case 1: opname = "fmla"; break;
case 5: opname = "fmls"; break;
case 9: opname = "fmul"; break;
}
int index;
if ((size & 1) == 0) {
index = (H << 1) | L;
} else {
index = H;
}
char r = Q ? 'q' : 'd';
const char *arrangement = GetArrangement(Q, size & 1);
snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, %c%d.%s[%d]", opname, r, Rd, r, Rn, r, Rm, arrangement, index);
} else {
snprintf(instr->text, sizeof(instr->text), "(asimd vector x indexed elem %08x)", w);
}
}
} else {
bail:
@ -516,10 +552,12 @@ static void FPandASIMD2(uint32_t w, uint64_t addr, Instruction *instr) {
} else if (((w >> 10) & 3) == 2) {
int opc = (w >> 12) & 0xf;
const char *opnames[9] = { "fmul", "fdiv", "fadd", "fsub", "fmax", "fmin", "fmaxnm", "fminnm", "fnmul" };
char r = 's'; // TODO: Support doubles too
char r = ((w >> 22) & 1) ? 'd' : 's';
snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, %c%d", opnames[opc], r, Rd, r, Rn, r, Rm);
} else if (((w >> 10) & 3) == 3) {
snprintf(instr->text, sizeof(instr->text), "(float cond select %08x)", w);
char fr = ((w >> 22) & 1) ? 'd' : 's';
int cond = (w >> 12) & 0xf;
snprintf(instr->text, sizeof(instr->text), "fcsel %c%d, %c%d, %c%d, %s", fr, Rd, fr, Rn, fr, Rm, condnames[cond]);
}
} else if (((w >> 21) & 0x2F8) == 0xF8) {
int opcode = ((w >> 15) & 1) | ((w >> 20) & 2);

View File

@ -39,6 +39,12 @@ bool TestArm64Emitter() {
//emitter.EXTR(W1, W3, 0, 7);
//RET(CheckLast(emitter, "53033061 extr w1, w3, w7"));
fp.FMUL(32, Q0, Q1, Q2, 3);
RET(CheckLast(emitter, "4fa29820 fmul q0, q1, q2.4s[3]")); // A real disasm says fmla v0.2s, v1.2s, v2.s[1] but I think our way is more readable
fp.FMLA(32, D0, D1, D2, 1);
RET(CheckLast(emitter, "1e222c20 fmla d0, d1, d2.2s[1]"));
fp.FCSEL(S0, S1, S2, CC_CS);
RET(CheckLast(emitter, "1e222c20 fcsel s0, s1, s2, cs"));
float value = 1.0;
uint8_t imm8;
FPImm8FromFloat(value, &imm8);
@ -46,7 +52,7 @@ bool TestArm64Emitter() {
RET(CheckLast(emitter, "1e2e1007 fmov s7, #1.000000"));
FPImm8FromFloat(-value, &imm8);
fp.FMOV(S7, imm8);
RET(CheckLast(emitter, "1e2e1007 fmov s7, #-1.000000"));
RET(CheckLast(emitter, "0fa21020 fmov s7, #-1.000000"));
fp.FMADD(S1, S2, S3, S4);
RET(CheckLast(emitter, "1f031041 fmadd s1, s2, s3, s4"));
fp.FNMSUB(D1, D2, D3, D4);
@ -151,7 +157,7 @@ bool TestArm64Emitter() {
RET(CheckLast(emitter, "1e2020e8 fcmp s7, #0.0"));
fp.FCMP(D7, D3);
RET(CheckLast(emitter, "1e6320e0 fcmp d7, d3"));
emitter.ORI2R(X1, X3, 0x3F, INVALID_REG);
emitter.ORRI2R(X1, X3, 0x3F, INVALID_REG);
RET(CheckLast(emitter, "b2401461 orr x1, x3, #0x3f"));
emitter.EORI2R(X1, X3, 0x3F0000003F0, INVALID_REG);
RET(CheckLast(emitter, "d21c1461 eor x1, x3, #0x3f0000003f0"));