[RV64_DYNAREC] Added more opcodes (#712)

* [RV64_DYNAREC] Added 64 33 XOR opcode

* [RV64_DYNAREC] Added 0F C8-CF BSWAP opcode

* [RV64_DYNAREC] Added 66 0F 3A 0B ROUNDSD opcode

* [RV64_DYNAREC] Added F3 0F BC TZCNT opcode

* [RV64_DYNAREC] Added F3 0F E6 CVTDQ2PD opcode

* [RV64_DYNAREC] Added F3 0F 5B CVTTPS2DQ opcode

* [RV64_DYNAREC] Fixed CVTTPS2DQ, CVTDQ2PD and printer
This commit is contained in:
xctan 2023-04-19 16:11:22 +08:00 committed by GitHub
parent 008ef41261
commit 6f29d2a5d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 173 additions and 3 deletions

View File

@ -1003,6 +1003,68 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
SW(x5, gback, 2*4);
SW(x6, gback, 3*4);
break;
case 0xC8:
case 0xC9:
case 0xCA:
case 0xCB:
case 0xCC:
case 0xCD:
case 0xCE:
case 0xCF: /* BSWAP reg */
INST_NAME("BSWAP Reg");
gd = xRAX+(opcode&7)+(rex.b<<3);
MOV_U12(x1, 0xff);
SLLI(x4, x1, 8); // mask 0xff00
if (rex.w) {
SLLI(x5, x1, 16); // mask 0xff0000
SLLI(x6, x1, 24); // mask 0xff000000
SRLI(x2, gd, 56);
SRLI(x3, gd, 40);
AND(x3, x3, x4);
OR(x2, x2, x3);
SRLI(x3, gd, 24);
AND(x3, x3, x5);
OR(x2, x2, x3);
SRLI(x3, gd, 8);
AND(x3, x3, x6);
OR(x2, x2, x3);
AND(x3, gd, x6);
SLLI(x3, x3, 8);
OR(x2, x2, x3);
AND(x3, gd, x5);
SLLI(x3, x3, 24);
OR(x2, x2, x3);
AND(x3, gd, x4);
SLLI(x3, x3, 40);
OR(x2, x2, x3);
SLLI(x3, x3, 56);
OR(gd, x2, x3);
} else {
SRLIW(x2, gd, 24);
SRLIW(x3, gd, 8);
AND(x3, x3, x4);
OR(x2, x2, x3);
AND(x3, gd, x4);
SLLI(x3, x3, 8);
OR(x2, x2, x3);
AND(x3, gd, x1);
SLLI(x3, x3, 24);
OR(gd, x2, x3);
}
break;
default:
DEFAULT;
}

View File

@ -73,6 +73,17 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
GETEDO(x4, 0, x5);
emit_sub32(dyn, ninst, rex, gd, ed, x3, x4, x5);
break;
// case 0x33:
// INST_NAME("XOR Gd, Seg:Ed");
// SETFLAGS(X_ALL, SF_SET_PENDING);
// grab_segdata(dyn, addr, ninst, x4, seg);
// nextop = F8;
// GETGD;
// GETEDO(x4, 0, x5);
// emit_xor32(dyn, ninst, rex, gd, ed, x3, x4);
// break;
case 0x88:
INST_NAME("MOV Seg:Eb, Gb");
grab_segdata(dyn, addr, ninst, x4, seg);
@ -130,6 +141,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
SMWRITE2();
}
break;
case 0x8B:
INST_NAME("MOV Gd, Seg:Ed");
grab_segdata(dyn, addr, ninst, x4, seg);

View File

@ -47,6 +47,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
MAYUSE(eb1);
MAYUSE(eb2);
MAYUSE(j64);
static const int8_t round_round[] = { RD_RNE, RD_RDN, RD_RUP, RD_RTZ };
switch(opcode) {
case 0x10:
@ -242,6 +244,32 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
DEFAULT;
}
break;
case 0x3A: // these are some more SSSE3+ opcodes
opcode = F8;
switch(opcode) {
case 0x0B:
INST_NAME("ROUNDSD Gx, Ex, Ib");
nextop = F8;
GETGX(x1);
GETEXSD(d0, 0);
u8 = F8;
v1 = fpu_get_scratch(dyn);
if(u8&4) {
u8 = sse_setround(dyn, ninst, x4, x2);
FCVTLD(x5, d0, RD_DYN);
FCVTDL(v1, x5, RD_DYN);
x87_restoreround(dyn, ninst, u8);
} else {
FCVTLD(x5, d0, round_round[u8&3]);
FCVTDL(v1, x5, round_round[u8&3]);
}
FSD(v1, gback, 0);
break;
default:
DEFAULT;
}
break;
case 0x54:
INST_NAME("ANDPD Gx, Ex");
nextop = F8;

View File

@ -224,7 +224,58 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
SSE_LOOP_MV_Q2(x3);
if(!MODREG) SMWRITE2();
break;
case 0x5B:
INST_NAME("CVTTPS2DQ Gx, Ex");
nextop = F8;
GETEX(x5, 0) ;
GETGX(x6);
v0 = fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
q0 = fpu_get_scratch(dyn);
q1 = fpu_get_scratch(dyn);
FLW(v0, x5, 0);
FLW(v1, x5, 4);
FLW(q0, x5, 8);
FLW(q1, x5, 12);
FCVTWS(x1, v0, RD_RTZ);
FCVTWS(x2, v1, RD_RTZ);
FCVTWS(x3, q0, RD_RTZ);
FCVTWS(x4, q1, RD_RTZ);
SW(x1, x6, 0);
SW(x2, x6, 4);
SW(x3, x6, 8);
SW(x4, x6, 12);
break;
case 0xBC:
INST_NAME("TZCNT Gd, Ed");
SETFLAGS(X_ZF, SF_SUBSET);
SET_DFNONE();
nextop = F8;
GETED(0);
GETGD;
if(!rex.w && MODREG) {
AND(x4, ed, xMASK);
ed = x4;
}
BNE_MARK(ed, xZR);
ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF)));
ORI(xFlags, xFlags, 1<<F_CF);
MOV32w(gd, rex.w?64:32);
B_NEXT_nocond;
MARK;
NEG(x2, ed);
AND(x2, x2, ed);
TABLE64(x3, 0x03f79d71b4ca8b09ULL);
MUL(x2, x2, x3);
SRLI(x2, x2, 64-6);
TABLE64(x1, (uintptr_t)&deBruijn64tab);
ADD(x1, x1, x2);
LBU(gd, x1, 0);
ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF)));
BNE(gd, xZR, 4+4);
ORI(xFlags, xFlags, 1<<F_ZF);
break;
case 0xBD:
INST_NAME("LZCNT Gd, Ed");
SETFLAGS(X_ZF|X_CF, SF_SUBSET);
@ -326,6 +377,22 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
NEG(x2, x2);
FMVWX(d0, x2);
break;
case 0xE6:
INST_NAME("CVTDQ2PD Gx, Ex");
nextop = F8;
GETEX(x1, 0);
GETGX(x2);
q0 = fpu_get_scratch(dyn);
q1 = fpu_get_scratch(dyn);
LW(x3, x1, 0);
LW(x4, x1, 4);
FCVTDW(q0, x3, RD_DYN);
FCVTDW(q1, x4, RD_DYN);
FSD(q0, x2, 0);
FSD(q1, x2, 8);
break;
default:
DEFAULT;
}

View File

@ -398,6 +398,7 @@ f2831 ft811 FP temporaries Caller
#define RD_RMM 0b100
// In instructions rm field, selects dynamic rounding mode;
#define RD_RM 0b111
#define RD_DYN RD_RM
// load single precision from rs1+imm12 to frd
#define FLW(frd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b010, frd, 0b0000111))
@ -422,7 +423,7 @@ f2831 ft811 FP temporaries Caller
// Convert from signed 32bits to Single
#define FCVTSW(frd, rs1, rm) EMIT(R_type(0b1101000, 0b00000, rs1, rm, frd, 0b1010011))
// Convert from Single to signed 32bits (trucated)
#define FCVTWS(rd, frs1, tm) EMIT(R_type(0b1100000, 0b00000, frs1, rm, rd, 0b1010011))
#define FCVTWS(rd, frs1, rm) EMIT(R_type(0b1100000, 0b00000, frs1, rm, rd, 0b1010011))
#define FADDS(frd, frs1, frs2) EMIT(R_type(0b0000000, frs2, frs1, 0b000, frd, 0b1010011))
#define FSUBS(frd, frs1, frs2) EMIT(R_type(0b0000100, frs2, frs1, 0b000, frd, 0b1010011))

View File

@ -1315,7 +1315,7 @@ const char* rv64_print(uint32_t data, uintptr_t addr)
insn.name = "fcvt.d.lu";
break;
}
PRINT_xd_fs1();
PRINT_fd_xs1();
}
case 0x70: {
assert(RS2(data) == 0);