mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-02-13 10:24:39 +00:00
ARM64: Fix FCVTL, use it in v2hf
This commit is contained in:
parent
8eedcc7fb0
commit
acf08eefa8
@ -2084,14 +2084,13 @@ void ARM64FloatEmitter::EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd
|
||||
(1 << 10) | (Rn << 5) | Rd);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::Emit2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
|
||||
void ARM64FloatEmitter::Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, !IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__);
|
||||
bool quad = IsQuad(Rd);
|
||||
Rd = DecodeReg(Rd);
|
||||
Rn = DecodeReg(Rn);
|
||||
|
||||
Write32((quad << 30) | (U << 29) | (0x71 << 21) | (size << 22) | \
|
||||
Write32((Q << 30) | (U << 29) | (0x71 << 21) | (size << 22) | \
|
||||
(opcode << 12) | (1 << 11) | (Rn << 5) | Rd);
|
||||
}
|
||||
|
||||
@ -2786,27 +2785,27 @@ void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
|
||||
}
|
||||
void ARM64FloatEmitter::FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(0, 2 | (size >> 6), 0xF, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xF, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
EmitThreeSame(0, size >> 6, 0x1A, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
void ARM64FloatEmitter::FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn, bool source_upper)
|
||||
{
|
||||
Emit2RegMisc(0, size >> 6, 0x17, Rd, Rn);
|
||||
Emit2RegMisc(source_upper, 0, size >> 6, 0x17, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(0, dest_size >> 5, 0x16, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 0, dest_size >> 5, 0x16, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(0, 2 | (size >> 6), 0x1B, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0x1B, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(1, 2 | (size >> 6), 0x1B, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1B, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
@ -2818,11 +2817,11 @@ void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
}
|
||||
void ARM64FloatEmitter::FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(1, 2 | (size >> 6), 0xF, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xF, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FRSQRTE(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(1, 2 | (size >> 6), 0x1D, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1D, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
@ -2830,7 +2829,7 @@ void ARM64FloatEmitter::FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
}
|
||||
void ARM64FloatEmitter::NOT(ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(1, 0, 5, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 1, 0, 5, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
@ -2838,28 +2837,28 @@ void ARM64FloatEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
}
|
||||
void ARM64FloatEmitter::REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(0, size >> 4, 1, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 1, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(1, size >> 4, 0, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 1, size >> 4, 0, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(0, size >> 4, 0, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 0, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(0, size >> 6, 0x1D, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 0, size >> 6, 0x1D, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(1, size >> 6, 0x1D, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 1, size >> 6, 0x1D, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(0, dest_size >> 4, 0x12, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 0, dest_size >> 4, 0x12, Rd, Rn);
|
||||
}
|
||||
|
||||
// Move
|
||||
@ -3096,7 +3095,7 @@ void ARM64FloatEmitter::FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
}
|
||||
void ARM64FloatEmitter::FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(0, 2 | (size >> 6), 0x1D, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0x1D, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
@ -3104,7 +3103,7 @@ void ARM64FloatEmitter::FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
}
|
||||
void ARM64FloatEmitter::FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(1, 2 | (size >> 6), 0x1C, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1C, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
@ -3112,15 +3111,15 @@ void ARM64FloatEmitter::FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
}
|
||||
void ARM64FloatEmitter::FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(0, 2 | (size >> 6), 0x0C, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0x0C, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FCMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(1, 2 | (size >> 6), 0xD, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xD, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(0, 2 | (size >> 6), 0xE, Rd, Rn);
|
||||
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xE, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
|
||||
|
@ -796,7 +796,7 @@ public:
|
||||
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
|
||||
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn, bool source_upper = false);
|
||||
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
@ -889,7 +889,7 @@ private:
|
||||
void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void Emit2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn);
|
||||
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
@ -2151,9 +2151,6 @@ namespace MIPSComp
|
||||
MOVI2F(S0, 1.0f, SCRATCHREG1);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
fpr.MapDirtyInV(tempregs[i], sregs[i]);
|
||||
// Let's do it integer registers for now. NEON later.
|
||||
// There's gotta be a shorter way, can't find one though that takes
|
||||
// care of NaNs like the interpreter (ignores them and just operates on the bits).
|
||||
VSUB(fpr.V(tempregs[i]), S0, fpr.V(sregs[i]));
|
||||
}
|
||||
|
||||
|
@ -851,11 +851,93 @@ namespace MIPSComp
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vi2f(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
CONDITIONAL_DISABLE;
|
||||
if (js.HasUnknownPrefix()) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
int imm = (op >> 16) & 0x1f;
|
||||
const float mult = 1.0f / (float)(1UL << imm);
|
||||
|
||||
u8 sregs[4], dregs[4];
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
|
||||
MIPSReg tempregs[4];
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (!IsOverlapSafe(dregs[i], i, n, sregs)) {
|
||||
tempregs[i] = fpr.GetTempV();
|
||||
} else {
|
||||
tempregs[i] = dregs[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (mult != 1.0f)
|
||||
fp.MOVI2F(S0, mult, SCRATCH1);
|
||||
|
||||
// TODO: Use the SCVTF with builtin scaling where possible.
|
||||
for (int i = 0; i < n; i++) {
|
||||
fpr.MapDirtyInV(tempregs[i], sregs[i]);
|
||||
fp.SCVTF(fpr.V(tempregs[i]), fpr.V(sregs[i]));
|
||||
if (mult != 1.0f)
|
||||
fp.FMUL(fpr.V(tempregs[i]), fpr.V(tempregs[i]), S0);
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (dregs[i] != tempregs[i]) {
|
||||
fpr.MapDirtyInV(dregs[i], tempregs[i]);
|
||||
fp.FMOV(fpr.V(dregs[i]), fpr.V(tempregs[i]));
|
||||
}
|
||||
}
|
||||
|
||||
ApplyPrefixD(dregs, sz);
|
||||
fpr.ReleaseSpillLocksAndDiscardTemps();
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vh2f(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
CONDITIONAL_DISABLE;
|
||||
if (js.HasUnknownPrefix()) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
u8 sregs[4], dregs[4];
|
||||
VectorSize sz = GetVecSize(op);
|
||||
VectorSize outSz;
|
||||
|
||||
switch (sz) {
|
||||
case V_Single:
|
||||
outSz = V_Pair;
|
||||
break;
|
||||
case V_Pair:
|
||||
outSz = V_Quad;
|
||||
break;
|
||||
default:
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
int n = GetNumVectorElements(sz);
|
||||
int nOut = n * 2;
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixD(dregs, outSz, _VD);
|
||||
|
||||
// Take the single registers and combine them to a D register.
|
||||
for (int i = 0; i < n; i++) {
|
||||
fpr.MapRegV(sregs[i], sz);
|
||||
fp.INS(32, Q0, i, fpr.V(sregs[i]), 0);
|
||||
}
|
||||
// Convert four 16-bit floats in D0 to four 32-bit floats in Q0 (even if we only have two...)
|
||||
fp.FCVTL(32, Q0, D0);
|
||||
// Split apart again.
|
||||
for (int i = 0; i < nOut; i++) {
|
||||
fpr.MapRegV(dregs[i], MAP_DIRTY | MAP_NOINIT);
|
||||
fp.INS(32, fpr.V(dregs[i]), 0, Q0, i);
|
||||
}
|
||||
|
||||
ApplyPrefixD(dregs, sz);
|
||||
fpr.ReleaseSpillLocksAndDiscardTemps();
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vf2i(MIPSOpcode op) {
|
||||
@ -1679,9 +1761,6 @@ namespace MIPSComp
|
||||
fp.MOVI2F(S0, 1.0f, SCRATCH1);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
fpr.MapDirtyInV(tempregs[i], sregs[i]);
|
||||
// Let's do it integer registers for now. NEON later.
|
||||
// There's gotta be a shorter way, can't find one though that takes
|
||||
// care of NaNs like the interpreter (ignores them and just operates on the bits).
|
||||
fp.FSUB(fpr.V(tempregs[i]), S0, fpr.V(sregs[i]));
|
||||
}
|
||||
|
||||
|
@ -428,7 +428,8 @@ static void FPandASIMD1(uint32_t w, uint64_t addr, Instruction *instr) {
|
||||
break;
|
||||
case 2:
|
||||
if (((w >> 17) & 0xf) == 0) {
|
||||
snprintf(instr->text, sizeof(instr->text), "(asimd two-reg misc %08x)", w);
|
||||
// Very similar to scalar two-reg misc. can we share code?
|
||||
snprintf(instr->text, sizeof(instr->text), "(asimd vector two-reg misc %08x)", w);
|
||||
} else if (((w >> 17) & 0xf) == 1) {
|
||||
snprintf(instr->text, sizeof(instr->text), "(asimd across lanes %08x)", w);
|
||||
} else {
|
||||
@ -463,7 +464,7 @@ static void FPandASIMD1(uint32_t w, uint64_t addr, Instruction *instr) {
|
||||
}
|
||||
int index;
|
||||
if ((size & 1) == 0) {
|
||||
index = (H << 1) | L;
|
||||
index = (H << 1) | (int)L;
|
||||
} else {
|
||||
index = H;
|
||||
}
|
||||
|
@ -39,10 +39,12 @@ bool TestArm64Emitter() {
|
||||
|
||||
//emitter.EXTR(W1, W3, 0, 7);
|
||||
//RET(CheckLast(emitter, "53033061 extr w1, w3, w7"));
|
||||
//fp.FCVTL(32, Q6, D25);
|
||||
//RET(CheckLast(emitter, "4fa29820 fcvtl q6, d25")); // A real disasm says fmla v0.2s, v1.2s, v2.s[1] but I think our way is more readable
|
||||
fp.FMUL(32, Q0, Q1, Q2, 3);
|
||||
RET(CheckLast(emitter, "4fa29820 fmul q0, q1, q2.4s[3]")); // A real disasm says fmla v0.2s, v1.2s, v2.s[1] but I think our way is more readable
|
||||
fp.FMLA(32, D0, D1, D2, 1);
|
||||
RET(CheckLast(emitter, "1e222c20 fmla d0, d1, d2.2s[1]"));
|
||||
RET(CheckLast(emitter, "0fa21020 fmla d0, d1, d2.2s[1]"));
|
||||
fp.FCSEL(S0, S1, S2, CC_CS);
|
||||
RET(CheckLast(emitter, "1e222c20 fcsel s0, s1, s2, cs"));
|
||||
float value = 1.0;
|
||||
@ -52,7 +54,7 @@ bool TestArm64Emitter() {
|
||||
RET(CheckLast(emitter, "1e2e1007 fmov s7, #1.000000"));
|
||||
FPImm8FromFloat(-value, &imm8);
|
||||
fp.FMOV(S7, imm8);
|
||||
RET(CheckLast(emitter, "0fa21020 fmov s7, #-1.000000"));
|
||||
RET(CheckLast(emitter, "1e3e1007 fmov s7, #-1.000000"));
|
||||
fp.FMADD(S1, S2, S3, S4);
|
||||
RET(CheckLast(emitter, "1f031041 fmadd s1, s2, s3, s4"));
|
||||
fp.FNMSUB(D1, D2, D3, D4);
|
||||
|
Loading…
x
Reference in New Issue
Block a user