[DYNAREC] Added D9 opcodes

This commit is contained in:
ptitSeb 2021-04-03 11:43:17 +02:00
parent b239bec2c5
commit ae70f168a8
7 changed files with 370 additions and 7 deletions

View File

@ -287,7 +287,7 @@ if(ARM_DYNAREC)
"${BOX64_ROOT}/src/dynarec/dynarec_arm64_66.c"
"${BOX64_ROOT}/src/dynarec/dynarec_arm64_67.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_d8.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_d9.c"
"${BOX64_ROOT}/src/dynarec/dynarec_arm64_d9.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_da.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_db.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_dc.c"

View File

@ -670,6 +670,12 @@
#define VSTR128_REG(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 0, Rn, Dt))
#define VSTR128_REG_LSL4(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 1, Rn, Dt))
#define VLDR_PC_gen(opc, imm19, Rt) ((opc)<<30 | 0b011<<27 | 1<<26 | (imm19)<<5 | (Rt))
#define VLDR32_literal(Vt, imm19) EMIT(VLDR_PC_gen(0b00, ((imm19)>>2)&0x7FFFF, Vt))
#define VLDR64_literal(Vt, imm19) EMIT(VLDR_PC_gen(0b01, ((imm19)>>2)&0x7FFFF, Vt))
#define VLDR128_literal(Vt, imm19) EMIT(VLDR_PC_gen(0b10, ((imm19)>>2)&0x7FFFF, Vt))
#define LD1R_gen(Q, size, Rn, Rt) ((Q)<<30 | 0b0011010<<23 | 1<<22 | 0<<21 | 0b110<<13 | (size)<<10 | (Rn)<<5 | (Rt))
#define VLDQ1R_8(Vt, Rn) EMIT(LD1R_gen(1, 0b00, Rn, Vt))
#define VLDQ1R_16(Vt, Rn) EMIT(LD1R_gen(1, 0b01, Rn, Vt))
@ -878,6 +884,15 @@
#define VFADDPQS(Vd, Vn, Vm) EMIT(FADDP_vector(1, 0, Vm, Vn, Vd))
#define VFADDPQD(Vd, Vn, Vm) EMIT(FADDP_vector(1, 1, Vm, Vn, Vd))
// NEG / ABS
#define FNEGABS_scalar(type, opc, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (opc)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd))
#define FNEGS(Sd, Sn) EMIT(FNEGABS_scalar(0b00, 0b10, Sn, Sd))
#define FNEGD(Dd, Dn) EMIT(FNEGABS_scalar(0b01, 0b10, Dn, Dd))
#define FABSS(Sd, Sn) EMIT(FNEGABS_scalar(0b00, 0b01, Sn, Sd))
#define FABSD(Dd, Dn) EMIT(FNEGABS_scalar(0b01, 0b01, Dn, Dd))
// MUL
#define FMUL_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11011<<11 | 1<<10 | (Rn)<<5 | (Rd))
#define VFMULS(Sd, Sn, Sm) EMIT(FMUL_vector(0, 0, Sm, Sn, Sd))

View File

@ -1828,7 +1828,11 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
break;
}
break;
case 0xD9:
addr = dynarec64_D9(dyn, addr, ip, ninst, rex, rep, ok, need_epilog);
break;
case 0xE8:
INST_NAME("CALL Id");
i32 = F32S;

View File

@ -0,0 +1,339 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <errno.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_arm64.h"
#include "dynarec_arm64_private.h"
#include "arm64_printer.h"
#include "emu/x87emu_private.h"
#include "dynarec_arm64_helper.h"
#include "dynarec_arm64_functions.h"
uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)
{
uint8_t nextop = F8;
uint8_t ed;
uint8_t wback, wb1;
int fixedaddress;
int v1, v2;
int s0;
int i1, i2, i3;
MAYUSE(s0);
MAYUSE(v2);
MAYUSE(v1);
switch(nextop) {
case 0xC0:
case 0xC1:
case 0xC2:
case 0xC3:
case 0xC4:
case 0xC5:
case 0xC6:
case 0xC7:
INST_NAME("FLD STx");
v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7);
v2 = x87_do_push(dyn, ninst);
FMOVD(v2, v1);
break;
case 0xC8:
case 0xC9:
case 0xCA:
case 0xCB:
case 0xCC:
case 0xCD:
case 0xCE:
case 0xCF:
INST_NAME("FXCH STx");
// swap the cache value, not the double value itself :p
i1 = x87_get_cache(dyn, ninst, x1, x2, nextop&7);
i2 = x87_get_cache(dyn, ninst, x1, x2, 0);
i3 = dyn->x87cache[i1];
dyn->x87cache[i1] = dyn->x87cache[i2];
dyn->x87cache[i2] = i3;
break;
case 0xD0:
INST_NAME("FNOP");
break;
case 0xE0:
INST_NAME("FCHS");
v1 = x87_get_st(dyn, ninst, x1, x2, 0);
FNEGD(v1, v1);
break;
case 0xE1:
INST_NAME("FABS");
v1 = x87_get_st(dyn, ninst, x1, x2, 0);
FABSD(v1, v1);
break;
case 0xE4:
INST_NAME("FTST");
v1 = x87_get_st(dyn, ninst, x1, x2, 0);
FCMPD_0(v1);
FCOM(x1, x2, x3); // same flags...
break;
case 0xE5:
INST_NAME("FXAM");
x87_refresh(dyn, ninst, x1, x2, 0);
CALL(fpu_fxam, -1); // should be possible inline, but is it worth it?
break;
case 0xE8:
INST_NAME("FLD1");
v1 = x87_do_push(dyn, ninst);
FTABLE64(v1, 1.0);
break;
case 0xE9:
INST_NAME("FLDL2T");
v1 = x87_do_push(dyn, ninst);
FTABLE64(v1, L2T);
break;
case 0xEA:
INST_NAME("FLDL2E");
v1 = x87_do_push(dyn, ninst);
FTABLE64(v1, L2E);
break;
case 0xEB:
INST_NAME("FLDPI");
v1 = x87_do_push(dyn, ninst);
FTABLE64(v1, PI);
break;
case 0xEC:
INST_NAME("FLDLG2");
v1 = x87_do_push(dyn, ninst);
FTABLE64(v1, LG2);
break;
case 0xED:
INST_NAME("FLDLN2");
v1 = x87_do_push(dyn, ninst);
FTABLE64(v1, LN2);
break;
case 0xEE:
INST_NAME("FLDZ");
v1 = x87_do_push(dyn, ninst);
FTABLE64(v1, 0.0);
break;
case 0xFA:
INST_NAME("FSQRT");
v1 = x87_get_st(dyn, ninst, x1, x2, 0);
FSQRTD(v1, v1);
break;
case 0xFC:
INST_NAME("FRNDINT");
// use C helper for now, nothing staightforward is available
x87_forget(dyn, ninst, x1, x2, 0);
CALL(arm_frndint, -1);
/*
v1 = x87_get_st(dyn, ninst, x1, x2, 0);
VCMP_F64_0(v1);
VMRS_APSR();
B_NEXT(cVS); // Unordered, skip
B_NEXT(cEQ); // Zero, skip
u8 = x87_setround(dyn, ninst, x1, x2, x3);
VCVT_S32_F64(x1, v1); // limit to 32bits....
VCVT_F64_S32(v1, x1);
x87_restoreround(dyn, ninst, u8);
*/
break;
case 0xF0:
INST_NAME("F2XM1");
x87_forget(dyn, ninst, x1, x2, 0);
CALL(arm_f2xm1, -1);
break;
case 0xF1:
INST_NAME("FYL2X");
x87_forget(dyn, ninst, x1, x2, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(arm_fyl2x, -1);
x87_do_pop(dyn, ninst);
break;
case 0xF2:
INST_NAME("FTAN");
x87_forget(dyn, ninst, x1, x2, 0);
CALL(arm_ftan, -1);
v1 = x87_do_push(dyn, ninst);
FTABLE64(v1, 1.0);
break;
case 0xF3:
INST_NAME("FPATAN");
x87_forget(dyn, ninst, x1, x2, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(arm_fpatan, -1);
x87_do_pop(dyn, ninst);
break;
case 0xF4:
INST_NAME("FXTRACT");
x87_do_push_empty(dyn, ninst, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(arm_fxtract, -1);
break;
case 0xF5:
INST_NAME("FPREM1");
x87_forget(dyn, ninst, x1, x2, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(arm_fprem1, -1);
break;
case 0xF6:
INST_NAME("FDECSTP");
fpu_purgecache(dyn, ninst, x1, x2, x3);
LDRw_U12(x2, xEmu, offsetof(x64emu_t, top));
SUBw_U12(x2, x2, 1);
ANDw_mask(x2, x2, 0, 2); //mask=7
STRw_U12(x2, xEmu, offsetof(x64emu_t, top));
break;
case 0xF7:
INST_NAME("FINCSTP");
fpu_purgecache(dyn, ninst, x1, x2, x3);
LDRw_U12(x2, xEmu, offsetof(x64emu_t, top));
ADDw_U12(x2, x2, 1);
ANDw_mask(x2, x2, 0, 2); //mask=7
STRw_U12(x2, xEmu, offsetof(x64emu_t, top));
break;
case 0xF8:
INST_NAME("FPREM");
x87_forget(dyn, ninst, x1, x2, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(arm_fprem, -1);
break;
case 0xF9:
INST_NAME("FYL2XP1");
x87_forget(dyn, ninst, x1, x2, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(arm_fyl2xp1, -1);
x87_do_pop(dyn, ninst);
break;
case 0xFB:
INST_NAME("FSINCOS");
x87_do_push_empty(dyn, ninst, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(arm_fsincos, -1);
break;
case 0xFD:
INST_NAME("FSCALE");
x87_forget(dyn, ninst, x1, x2, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(arm_fscale, -1);
break;
case 0xFE:
INST_NAME("FSIN");
x87_forget(dyn, ninst, x1, x2, 0);
CALL(arm_fsin, -1);
break;
case 0xFF:
INST_NAME("FCOS");
x87_forget(dyn, ninst, x1, x2, 0);
CALL(arm_fcos, -1);
break;
case 0xD1:
case 0xD4:
case 0xD5:
case 0xD6:
case 0xD7:
case 0xD8:
case 0xD9:
case 0xDA:
case 0xDB:
case 0xDC:
case 0xDD:
case 0xDE:
case 0xDF:
case 0xE2:
case 0xE3:
case 0xE6:
case 0xE7:
case 0xEF:
DEFAULT;
break;
default:
switch((nextop>>3)&7) {
case 0:
INST_NAME("FLD ST0, float[ED]");
v1 = x87_do_push(dyn, ninst);
s0 = fpu_get_scratch(dyn);
addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0);
VLDR32_U12(s0, ed, fixedaddress);
FCVT_D_S(v1, s0);
break;
case 2:
INST_NAME("FST float[ED], ST0");
v1 = x87_get_st(dyn, ninst, x1, x2, 0);
s0 = fpu_get_scratch(dyn);
FCVT_S_D(s0, v1);
addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0);
VSTR32_U12(s0, ed, fixedaddress);
break;
case 3:
INST_NAME("FSTP float[ED], ST0");
v1 = x87_get_st(dyn, ninst, x1, x2, 0);
s0 = fpu_get_scratch(dyn);
FCVT_S_D(s0, v1);
addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0);
VSTR32_U12(s0, ed, fixedaddress);
x87_do_pop(dyn, ninst);
break;
case 4:
INST_NAME("FLDENV Ed");
fpu_purgecache(dyn, ninst, x1, x2, x3); // maybe only x87, not SSE?
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0);
if(ed!=x1) {
MOVx_REG(x1, ed);
}
MOV32w(x2, 0);
CALL(fpu_loadenv, -1);
break;
case 5:
INST_NAME("FLDCW Ew");
GETEW(x1, 0);
STRH_U12(x1, xEmu, offsetof(x64emu_t, cw)); // hopefully cw is not too far for an imm8
UBFXw(x1, x1, 10, 2); // extract round
STRw_U12(x1, xEmu, offsetof(x64emu_t, round));
break;
case 6:
INST_NAME("FNSTENV Ed");
fpu_purgecache(dyn, ninst, x1, x2, x3); // maybe only x87, not SSE?
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0);
if(ed!=x1) {
MOVx_REG(x1, ed);
}
MOV32w(x2, 0);
CALL(fpu_savenv, -1);
break;
case 7:
INST_NAME("FNSTCW Ew");
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, 1, rex, 0, 0);
ed = x1;
wb1 = 1;
LDRH_U12(x1, xEmu, offsetof(x64emu_t, cw));
EWBACK;
break;
default:
DEFAULT;
}
}
return addr;
}

View File

@ -358,8 +358,8 @@
// Generate FCOM with s1 and s2 scratch regs (the VCMP is already done)
#define FCOM(s1, s2, s3) \
LDRH_U12(s3, xEmu, offsetof(x64emu_t, sw)); /*offset is 8bits right?*/\
MOV32w(s1, 0b01000111); \
BICw_REG_LSL(s3, s3, s1, 8); \
MOV32w(s1, 0b0100011100000000); \
BICw_REG(s3, s3, s1); \
CSETw(s1, cMI); /* 1 if less than, 0 else */ \
MOV32w(s2, 0b01000101); /* unordered */ \
CSELw(s1, s2, s1, cVS); \
@ -508,6 +508,9 @@
#ifndef TABLE64
#define TABLE64(A, V)
#endif
#ifndef FTABLE64
#define FTABLE64(A, V)
#endif
#if STEP < 2
#define GETIP(A)
@ -833,7 +836,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
//uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
//uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
//uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
//uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
//uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);

View File

@ -7,4 +7,5 @@
#define INST_EPILOG dyn->insts[ninst].epilog = dyn->arm_size;
#define INST_NAME(name)
#define NEW_BARRIER_INST if(ninst) ++dyn->sons_size
#define TABLE64(A, V) if((V)>0xffffffffLL) {Table64(dyn, (V)); EMIT(0);} else {MOV64x(A, V);}
#define TABLE64(A, V) if((V)>0xffffffffLL) {Table64(dyn, (V)); EMIT(0);} else {MOV64x(A, V);}
#define FTABLE64(A, V) {mmx87_regs_t v = {.d = V}; Table64(dyn, v.q); EMIT(0);}

View File

@ -32,4 +32,5 @@
++dyn->sons_size; \
}
#define TABLE64(A, V) if((V)>0xffffffffLL) {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, " Table64: 0x%lx\n", (V)); LDRx_literal(A, val64offset);} else {MOV64x(A, V);}
#define TABLE64(A, V) if((V)>0xffffffffLL) {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, " Table64: 0x%lx\n", (V)); LDRx_literal(A, val64offset);} else {MOV64x(A, V);}
#define FTABLE64(A, V) {mmx87_regs_t v = {.d = V}; int val64offset = Table64(dyn, v.q); MESSAGE(LOG_DUMP, " FTable64: %g\n", v.d); VLDR64_literal(A, val64offset);}