mirror of
https://github.com/ptitSeb/box64.git
synced 2024-11-27 08:40:59 +00:00
[RV64_DYNAREC] Added more opcodes (#712)
* [RV64_DYNAREC] Added 64 33 XOR opcode * [RV64_DYNAREC] Added 0F C8-CF BSWAP opcode * [RV64_DYNAREC] Added 66 0F 3A 0B ROUNDSD opcode * [RV64_DYNAREC] Added F3 0F BC TZCNT opcode * [RV64_DYNAREC] Added F3 0F E6 CVTDQ2PD opcode * [RV64_DYNAREC] Added F3 0F 5B CVTTPS2DQ opcode * [RV64_DYNAREC] Fixed CVTTPS2DQ, CVTDQ2PD and printer
This commit is contained in:
parent
008ef41261
commit
6f29d2a5d8
@ -1003,6 +1003,68 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
|
||||
SW(x5, gback, 2*4);
|
||||
SW(x6, gback, 3*4);
|
||||
break;
|
||||
|
||||
case 0xC8:
|
||||
case 0xC9:
|
||||
case 0xCA:
|
||||
case 0xCB:
|
||||
case 0xCC:
|
||||
case 0xCD:
|
||||
case 0xCE:
|
||||
case 0xCF: /* BSWAP reg */
|
||||
INST_NAME("BSWAP Reg");
|
||||
gd = xRAX+(opcode&7)+(rex.b<<3);
|
||||
MOV_U12(x1, 0xff);
|
||||
SLLI(x4, x1, 8); // mask 0xff00
|
||||
if (rex.w) {
|
||||
SLLI(x5, x1, 16); // mask 0xff0000
|
||||
SLLI(x6, x1, 24); // mask 0xff000000
|
||||
|
||||
SRLI(x2, gd, 56);
|
||||
|
||||
SRLI(x3, gd, 40);
|
||||
AND(x3, x3, x4);
|
||||
OR(x2, x2, x3);
|
||||
|
||||
SRLI(x3, gd, 24);
|
||||
AND(x3, x3, x5);
|
||||
OR(x2, x2, x3);
|
||||
|
||||
SRLI(x3, gd, 8);
|
||||
AND(x3, x3, x6);
|
||||
OR(x2, x2, x3);
|
||||
|
||||
AND(x3, gd, x6);
|
||||
SLLI(x3, x3, 8);
|
||||
OR(x2, x2, x3);
|
||||
|
||||
AND(x3, gd, x5);
|
||||
SLLI(x3, x3, 24);
|
||||
OR(x2, x2, x3);
|
||||
|
||||
AND(x3, gd, x4);
|
||||
SLLI(x3, x3, 40);
|
||||
OR(x2, x2, x3);
|
||||
|
||||
SLLI(x3, x3, 56);
|
||||
OR(gd, x2, x3);
|
||||
} else {
|
||||
SRLIW(x2, gd, 24);
|
||||
|
||||
SRLIW(x3, gd, 8);
|
||||
AND(x3, x3, x4);
|
||||
OR(x2, x2, x3);
|
||||
|
||||
AND(x3, gd, x4);
|
||||
SLLI(x3, x3, 8);
|
||||
OR(x2, x2, x3);
|
||||
|
||||
AND(x3, gd, x1);
|
||||
SLLI(x3, x3, 24);
|
||||
OR(gd, x2, x3);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
DEFAULT;
|
||||
}
|
||||
|
@ -73,6 +73,17 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
|
||||
GETEDO(x4, 0, x5);
|
||||
emit_sub32(dyn, ninst, rex, gd, ed, x3, x4, x5);
|
||||
break;
|
||||
|
||||
// case 0x33:
|
||||
// INST_NAME("XOR Gd, Seg:Ed");
|
||||
// SETFLAGS(X_ALL, SF_SET_PENDING);
|
||||
// grab_segdata(dyn, addr, ninst, x4, seg);
|
||||
// nextop = F8;
|
||||
// GETGD;
|
||||
// GETEDO(x4, 0, x5);
|
||||
// emit_xor32(dyn, ninst, rex, gd, ed, x3, x4);
|
||||
// break;
|
||||
|
||||
case 0x88:
|
||||
INST_NAME("MOV Seg:Eb, Gb");
|
||||
grab_segdata(dyn, addr, ninst, x4, seg);
|
||||
@ -130,6 +141,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
|
||||
SMWRITE2();
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x8B:
|
||||
INST_NAME("MOV Gd, Seg:Ed");
|
||||
grab_segdata(dyn, addr, ninst, x4, seg);
|
||||
|
@ -47,6 +47,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
|
||||
MAYUSE(eb1);
|
||||
MAYUSE(eb2);
|
||||
MAYUSE(j64);
|
||||
|
||||
static const int8_t round_round[] = { RD_RNE, RD_RDN, RD_RUP, RD_RTZ };
|
||||
|
||||
switch(opcode) {
|
||||
case 0x10:
|
||||
@ -242,6 +244,32 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
|
||||
DEFAULT;
|
||||
}
|
||||
break;
|
||||
case 0x3A: // these are some more SSSE3+ opcodes
|
||||
opcode = F8;
|
||||
switch(opcode) {
|
||||
case 0x0B:
|
||||
INST_NAME("ROUNDSD Gx, Ex, Ib");
|
||||
nextop = F8;
|
||||
GETGX(x1);
|
||||
GETEXSD(d0, 0);
|
||||
u8 = F8;
|
||||
v1 = fpu_get_scratch(dyn);
|
||||
if(u8&4) {
|
||||
u8 = sse_setround(dyn, ninst, x4, x2);
|
||||
FCVTLD(x5, d0, RD_DYN);
|
||||
FCVTDL(v1, x5, RD_DYN);
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
} else {
|
||||
FCVTLD(x5, d0, round_round[u8&3]);
|
||||
FCVTDL(v1, x5, round_round[u8&3]);
|
||||
}
|
||||
FSD(v1, gback, 0);
|
||||
break;
|
||||
default:
|
||||
DEFAULT;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x54:
|
||||
INST_NAME("ANDPD Gx, Ex");
|
||||
nextop = F8;
|
||||
|
@ -224,7 +224,58 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
|
||||
SSE_LOOP_MV_Q2(x3);
|
||||
if(!MODREG) SMWRITE2();
|
||||
break;
|
||||
|
||||
|
||||
case 0x5B:
|
||||
INST_NAME("CVTTPS2DQ Gx, Ex");
|
||||
nextop = F8;
|
||||
GETEX(x5, 0) ;
|
||||
GETGX(x6);
|
||||
v0 = fpu_get_scratch(dyn);
|
||||
v1 = fpu_get_scratch(dyn);
|
||||
q0 = fpu_get_scratch(dyn);
|
||||
q1 = fpu_get_scratch(dyn);
|
||||
FLW(v0, x5, 0);
|
||||
FLW(v1, x5, 4);
|
||||
FLW(q0, x5, 8);
|
||||
FLW(q1, x5, 12);
|
||||
FCVTWS(x1, v0, RD_RTZ);
|
||||
FCVTWS(x2, v1, RD_RTZ);
|
||||
FCVTWS(x3, q0, RD_RTZ);
|
||||
FCVTWS(x4, q1, RD_RTZ);
|
||||
SW(x1, x6, 0);
|
||||
SW(x2, x6, 4);
|
||||
SW(x3, x6, 8);
|
||||
SW(x4, x6, 12);
|
||||
break;
|
||||
case 0xBC:
|
||||
INST_NAME("TZCNT Gd, Ed");
|
||||
SETFLAGS(X_ZF, SF_SUBSET);
|
||||
SET_DFNONE();
|
||||
nextop = F8;
|
||||
GETED(0);
|
||||
GETGD;
|
||||
if(!rex.w && MODREG) {
|
||||
AND(x4, ed, xMASK);
|
||||
ed = x4;
|
||||
}
|
||||
BNE_MARK(ed, xZR);
|
||||
ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF)));
|
||||
ORI(xFlags, xFlags, 1<<F_CF);
|
||||
MOV32w(gd, rex.w?64:32);
|
||||
B_NEXT_nocond;
|
||||
MARK;
|
||||
NEG(x2, ed);
|
||||
AND(x2, x2, ed);
|
||||
TABLE64(x3, 0x03f79d71b4ca8b09ULL);
|
||||
MUL(x2, x2, x3);
|
||||
SRLI(x2, x2, 64-6);
|
||||
TABLE64(x1, (uintptr_t)&deBruijn64tab);
|
||||
ADD(x1, x1, x2);
|
||||
LBU(gd, x1, 0);
|
||||
ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF)));
|
||||
BNE(gd, xZR, 4+4);
|
||||
ORI(xFlags, xFlags, 1<<F_ZF);
|
||||
break;
|
||||
case 0xBD:
|
||||
INST_NAME("LZCNT Gd, Ed");
|
||||
SETFLAGS(X_ZF|X_CF, SF_SUBSET);
|
||||
@ -326,6 +377,22 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
|
||||
NEG(x2, x2);
|
||||
FMVWX(d0, x2);
|
||||
break;
|
||||
|
||||
case 0xE6:
|
||||
INST_NAME("CVTDQ2PD Gx, Ex");
|
||||
nextop = F8;
|
||||
GETEX(x1, 0);
|
||||
GETGX(x2);
|
||||
q0 = fpu_get_scratch(dyn);
|
||||
q1 = fpu_get_scratch(dyn);
|
||||
LW(x3, x1, 0);
|
||||
LW(x4, x1, 4);
|
||||
FCVTDW(q0, x3, RD_DYN);
|
||||
FCVTDW(q1, x4, RD_DYN);
|
||||
FSD(q0, x2, 0);
|
||||
FSD(q1, x2, 8);
|
||||
break;
|
||||
|
||||
default:
|
||||
DEFAULT;
|
||||
}
|
||||
|
@ -398,6 +398,7 @@ f28–31 ft8–11 FP temporaries Caller
|
||||
#define RD_RMM 0b100
|
||||
// In instruction’s rm field, selects dynamic rounding mode;
|
||||
#define RD_RM 0b111
|
||||
#define RD_DYN RD_RM
|
||||
|
||||
// load single precision from rs1+imm12 to frd
|
||||
#define FLW(frd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b010, frd, 0b0000111))
|
||||
@ -422,7 +423,7 @@ f28–31 ft8–11 FP temporaries Caller
|
||||
// Convert from signed 32bits to Single
|
||||
#define FCVTSW(frd, rs1, rm) EMIT(R_type(0b1101000, 0b00000, rs1, rm, frd, 0b1010011))
|
||||
// Convert from Single to signed 32bits (trucated)
|
||||
#define FCVTWS(rd, frs1, tm) EMIT(R_type(0b1100000, 0b00000, frs1, rm, rd, 0b1010011))
|
||||
#define FCVTWS(rd, frs1, rm) EMIT(R_type(0b1100000, 0b00000, frs1, rm, rd, 0b1010011))
|
||||
|
||||
#define FADDS(frd, frs1, frs2) EMIT(R_type(0b0000000, frs2, frs1, 0b000, frd, 0b1010011))
|
||||
#define FSUBS(frd, frs1, frs2) EMIT(R_type(0b0000100, frs2, frs1, 0b000, frd, 0b1010011))
|
||||
|
@ -1315,7 +1315,7 @@ const char* rv64_print(uint32_t data, uintptr_t addr)
|
||||
insn.name = "fcvt.d.lu";
|
||||
break;
|
||||
}
|
||||
PRINT_xd_fs1();
|
||||
PRINT_fd_xs1();
|
||||
}
|
||||
case 0x70: {
|
||||
assert(RS2(data) == 0);
|
||||
|
Loading…
Reference in New Issue
Block a user