[RV64_DYNAREC] Added more opcode and some fixes (#710)

* Added 64 88/89/C7 opcodes for test11

* Added more DF opcodes

* Fixed various bugs in x87/SSE/mmx infrastructure

* Added F2 0F 2D CVTSD2SI opcode and remove a TODO in CVTTSD2SI

* Fixed Invalid Operation handling DF opcodes (interpreter also)

* Added 32bits -> 16bits overflow test on DF opcodes
This commit is contained in:
Yang Liu 2023-04-18 21:01:17 +08:00 committed by GitHub
parent c61d341844
commit 008ef41261
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 181 additions and 22 deletions

View File

@ -73,6 +73,63 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
GETEDO(x4, 0, x5);
emit_sub32(dyn, ninst, rex, gd, ed, x3, x4, x5);
break;
case 0x88:
INST_NAME("MOV Seg:Eb, Gb");
grab_segdata(dyn, addr, ninst, x4, seg);
nextop=F8;
gd = ((nextop&0x38)>>3)+(rex.r<<3);
if(rex.rex) {
gb2 = 0;
gb1 = xRAX + gd;
} else {
gb2 = ((gd&4)>>2);
gb1 = xRAX+(gd&3);
}
gd = x5;
if(gb2) {
SRLI(x5, gb1, 8);
gb1 = x5;
}
if(MODREG) {
ed = (nextop&7) + (rex.b<<3);
if(rex.rex) {
eb1 = xRAX+ed;
eb2 = 0;
} else {
eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx
eb2 = ((ed&4)>>2); // L or H
}
ANDI(gd, gb1, 0xff);
if(eb2) {
MOV64x(x1, 0xffffffffffff00ffLL);
ANDI(x1, eb1, x1);
SLLI(gd, gd, 8);
OR(eb1, x1, gd);
} else {
ANDI(x1, eb1, ~0xff);
OR(eb1, x1, gd);
}
} else {
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0);
ADD(x4, ed, x4);
SB(gb1, x4, 0);
SMWRITE2();
}
break;
case 0x89:
INST_NAME("MOV Seg:Ed, Gd");
grab_segdata(dyn, addr, ninst, x4, seg);
nextop=F8;
GETGD;
if(MODREG) { // reg <= reg
MVxw(xRAX+(nextop&7)+(rex.b<<3), gd);
} else { // mem <= reg
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
ADD(x4, ed, x4);
SDxw(gd, x4, 0);
SMWRITE2();
}
break;
case 0x8B:
INST_NAME("MOV Gd, Seg:Ed");
grab_segdata(dyn, addr, ninst, x4, seg);
@ -87,6 +144,23 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
LDxw(gd, x4, 0);
}
break;
case 0xC7:
INST_NAME("MOV Seg:Ed, Id");
grab_segdata(dyn, addr, ninst, x4, seg);
nextop=F8;
if(MODREG) { // reg <= i32
i64 = F32S;
ed = xRAX+(nextop&7)+(rex.b<<3);
MOV64xw(ed, i64);
} else { // mem <= i32
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 4);
i64 = F32S;
MOV64xw(x3, i64);
ADD(x4, ed, x4);
SDxw(x3, x4, 0);
SMWRITE2();
}
break;
default:
DEFAULT;
}

View File

@ -111,10 +111,56 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
FCVTDL(v1, x1, RD_RNE);
}
break;
case 1:
INST_NAME("FISTTP Ew, ST0");
v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F);
addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 0, 0);
if(!box64_dynarec_fastround) {
FSFLAGSI(xZR); // reset all bits
}
FCVTWD(x4, v1, RD_RTZ);
if(!box64_dynarec_fastround) {
FRFLAGS(x5); // get back FPSR to check the IOC bit
ANDI(x5, x5, 1<<FR_NV);
BNEZ_MARK(x5);
SLLIW(x5, x4, 16);
SRAIW(x5, x5, 16);
BEQ_MARK2(x5, x4);
MARK;
MOV32w(x4, 0x8000);
}
MARK2;
SH(x4, wback, fixedaddress);
x87_do_pop(dyn, ninst, x3);
break;
case 3:
INST_NAME("FISTP Ew, ST0");
v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F);
addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 0, 0);
u8 = sse_setround(dyn, ninst, x2, x3);
if(!box64_dynarec_fastround) {
FSFLAGSI(xZR); // reset all bits
}
FCVTWD(x4, v1, RD_RM);
x87_restoreround(dyn, ninst, u8);
if(!box64_dynarec_fastround) {
FRFLAGS(x5); // get back FPSR to check the IOC bit
ANDI(x5, x5, 1<<FR_NV);
BNEZ_MARK(x5);
SLLIW(x5, x4, 16);
SRAIW(x5, x5, 16);
BEQ_MARK2(x5, x4);
MARK;
MOV32w(x4, 0x8000);
}
MARK2;
SH(x4, wback, fixedaddress);
x87_do_pop(dyn, ninst, x3);
break;
default:
DEFAULT;
break;
}
}
return addr;
}
}

View File

@ -104,8 +104,42 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
nextop = F8;
GETGD;
GETEXSD(v0, 0);
// TODO: fastnan handling
if(!box64_dynarec_fastround) {
FSFLAGSI(xZR); // // reset all bits
}
FCVTLDxw(gd, v0, RD_RTZ);
if(!box64_dynarec_fastround) {
FRFLAGS(x5); // get back FPSR to check the IOC bit
ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF));
CBZ_NEXT(x5);
if(rex.w) {
MOV64x(gd, 0x8000000000000000LL);
} else {
MOV32w(gd, 0x80000000);
}
}
break;
case 0x2D:
INST_NAME("CVTSD2SI Gd, Ex");
nextop = F8;
GETGD;
GETEXSD(v0, 0);
if(!box64_dynarec_fastround) {
FSFLAGSI(xZR); // // reset all bits
}
u8 = sse_setround(dyn, ninst, x2, x3);
FCVTLDxw(gd, v0, RD_RM);
x87_restoreround(dyn, ninst, u8);
if(!box64_dynarec_fastround) {
FRFLAGS(x5); // get back FPSR to check the IOC bit
ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF));
CBZ_NEXT(x5);
if(rex.w) {
MOV64x(gd, 0x8000000000000000LL);
} else {
MOV32w(gd, 0x80000000);
}
}
break;
case 0x38: // these are some more SSSE4.2+ opcodes
opcode = F8;

View File

@ -984,7 +984,7 @@ void x87_swapreg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int a, int b)
}
// Set rounding according to cw flags, return reg to restore flags
int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2)
{
MAYUSE(dyn); MAYUSE(ninst);
MAYUSE(s1); MAYUSE(s2);
@ -994,18 +994,19 @@ int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
// MMX/x87 Round mode: 0..3: Nearest, Down, Up, Chop
// RV64: 0..7: Nearest, Toward Zero (Chop), Down, Up, Nearest tie to Max, invalid, invalid, dynamic (invalid here)
// 0->0, 1->2, 2->3, 3->1
SLLI(s1, s1, 1);
BEQ(s1, xZR, 24);
ADDI(s2, xZR, 3);
BGE(s1, s2, 4+8);
SUBI(s1, s1, 4);
XORI(s3, s1, 0b11);
BEQ(s1, s2, 12);
ADDI(s1, s1, 1);
BEQ(xZR, xZR, 8);
ADDI(s1, xZR, 1);
// transform done (is there a faster way?)
FSRM(s3); // exange RM with current
return s3;
FSRM(s1, s1); // exange RM with current
return s1;
}
// Set rounding according to mxcsr flags, return reg to restore flags
int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2)
{
MAYUSE(dyn); MAYUSE(ninst);
MAYUSE(s1); MAYUSE(s2);
@ -1015,14 +1016,15 @@ int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
// MMX/x87 Round mode: 0..3: Nearest, Down, Up, Chop
// RV64: 0..7: Nearest, Toward Zero (Chop), Down, Up, Nearest tie to Max, invalid, invalid, dynamic (invalid here)
// 0->0, 1->2, 2->3, 3->1
SLLI(s1, s1, 1);
BEQ(s1, xZR, 24);
ADDI(s2, xZR, 3);
BGE(s1, s2, 4+8);
SUBI(s1, s1, 4);
XORI(s3, s1, 0b11);
BEQ(s1, s2, 12);
ADDI(s1, s1, 1);
BEQ(xZR, xZR, 8);
ADDI(s1, xZR, 1);
// transform done (is there a faster way?)
FSRM(s3); // exange RM with current
return s3;
FSRM(s1, s1); // exange RM with current
return s1;
}
// Restore round flag, destroy s1 doing so
@ -1030,7 +1032,7 @@ void x87_restoreround(dynarec_rv64_t* dyn, int ninst, int s1)
{
MAYUSE(dyn); MAYUSE(ninst);
MAYUSE(s1);
FSRM(s1); // put back fpscr
FSRM(s1, s1); // put back fpscr
}
// MMX helpers

View File

@ -1004,11 +1004,11 @@ void x87_reget_st(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st);
// swap 2 x87 regs
void x87_swapreg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int a, int b);
// Set rounding according to cw flags, return reg to restore flags
int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3);
int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2);
// Restore round flag
void x87_restoreround(dynarec_rv64_t* dyn, int ninst, int s1);
// Set rounding according to mxcsr flags, return reg to restore flags
int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3);
int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2);
void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3);

View File

@ -369,8 +369,8 @@ f2831 ft811 FP temporaries Caller
// RV32F
// Read round mode
#define FRRM(rd) CSRRS(rd, xZR, 0x002)
// Swap round mode with rd
#define FSRM(rd) CSRRWI(rd, 0b111, 0x002)
// Swap round mode
#define FSRM(rd, rs) CSRRW(rd, rs, 0x002)
// Write FP exception flags, immediate
#define FSFLAGSI(imm) CSRRWI(xZR, imm, 0x0001)
// Read FP exception flags to rd

View File

@ -131,7 +131,10 @@ uintptr_t RunDF(x64emu_t *emu, rex_t rex, uintptr_t addr)
case 1: /* FISTTP Ew, ST0 */
GETEW(0);
tmp16s = ST0.d;
EW->sword[0] = tmp16s;
if(isgreater(ST0.d, (double)(int32_t)0x7fff) || isless(ST0.d, -(double)(int32_t)0x8000) || !isfinite(ST0.d))
EW->sword[0] = 0x8000;
else
EW->sword[0] = tmp16s;
fpu_do_pop(emu);
break;
case 2: /* FIST Ew, ST0 */