[RV64_DYNAREC] Added more SSE opcodes for Stardew Valley (#672)

* [RV64_DYNAREC] Fixed 66 0F 73 /7 PSLLDQ opcode

* [RV64_DYNAREC] Added 66 0F 6D PUNPCKHQDQ opcode

* [RV64_DYNAREC] Added F2 0F 2C CVTTSD2SI opcode

* [RV64_DYNAREC] Added 66 0F 7F MOVDQA opcode

* [RV64_DYNAREC] Added 0F C6 SHUFPS opcode

* [RV64_DYNAREC] Added 66 0F 72 /6 PSLLD opcode

* [RV64_DYNAREC] Added 66 0F 74 PCMPEQB opcode

* [RV64_DYNAREC] Added 66 0F FA PSUBD opcode

* [RV64_DYNAREC] Added F2 0F 5D MINSD opcode
This commit is contained in:
Yang Liu 2023-04-07 17:34:05 +08:00 committed by GitHub
parent 14695faa43
commit 149414f233
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 107 additions and 1 deletions

View File

@ -564,7 +564,28 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
if(!rex.w)
ZEROUP(gd);
break;
case 0xC6: // TODO: Optimize this!
INST_NAME("SHUFPS Gx, Ex, Ib");
nextop = F8;
GETGX(x1);
GETEX(x2, 1);
u8 = F8;
int32_t idx;
idx = (u8>>(0*2))&3;
LWU(x3, gback, idx*4);
idx = (u8>>(1*2))&3;
LWU(x4, gback, idx*4);
idx = (u8>>(2*2))&3;
LWU(x5, wback, fixedaddress+idx*4);
idx = (u8>>(3*2))&3;
LWU(x6, wback, fixedaddress+idx*4);
SW(x3, gback, 0*4);
SW(x4, gback, 1*4);
SW(x5, gback, 2*4);
SW(x6, gback, 3*4);
break;
default:
DEFAULT;
}

View File

@ -176,6 +176,16 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
SD(x3, gback, 8);
}
break;
case 0x6D:
INST_NAME("PUNPCKHQDQ Gx,Ex");
nextop = F8;
GETGX(x1);
GETEX(x2, 0);
LD(x3, gback, 8);
SD(x3, gback, 0);
LD(x3, wback, fixedaddress+8);
SD(x3, gback, 8);
break;
case 0x6E:
INST_NAME("MOVD Gx, Ed");
nextop = F8;
@ -206,7 +216,6 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
GETGX(x1);
GETEX(x2, 1);
u8 = F8;
i32 = -1;
int32_t idx;
idx = (u8>>(0*2))&3;
@ -240,6 +249,20 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
}
}
break;
case 6:
INST_NAME("PSLLD Ex, Ib");
GETEX(x1, 1);
u8 = F8;
if(u8) {
if (u8>31) {
// just zero dest
SD(xZR, x1, fixedaddress+0);
SD(xZR, x1, fixedaddress+8);
} else if(u8) {
SSE_LOOP_DS(x3, SLLI(x3, x3, u8));
}
}
break;
default:
DEFAULT;
}
@ -302,10 +325,25 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
SD(xZR, x1, fixedaddress+0);
}
}
break;
default:
DEFAULT;
}
break;
case 0x74:
INST_NAME("PCMPEQB Gx,Ex");
nextop = F8;
GETGX(x1);
GETEX(x2, 0);
for (int i=0; i<16; ++i) {
LBU(x3, gback, i);
LBU(x4, wback, fixedaddress+i);
SUB(x3, x3, x4);
SEQZ(x3, x3);
NEG(x3, x3);
SB(x3, gback, i);
}
break;
case 0x76:
INST_NAME("PCMPEQD Gx,Ex");
nextop = F8;
@ -339,6 +377,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
}
}
break;
case 0x7F:
INST_NAME("MOVDQA Ex,Gx");
nextop = F8;
GETGX(x1);
GETEX(x2, 0);
SSE_LOOP_MV_Q2(x3);
if(!MODREG) SMWRITE2();
break;
case 0xAF:
INST_NAME("IMUL Gw,Ew");
SETFLAGS(X_ALL, SF_PENDING);
@ -422,6 +468,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
}
break;
case 0xFA:
INST_NAME("PSUBD Gx,Ex");
nextop = F8;
GETGX(x1);
GETEX(x2, 0);
SSE_LOOP_D(x3, x4, SUBW(x3, x3, x4));
break;
case 0xFD:
INST_NAME("PADDW Gx,Ex");
nextop = F8;

View File

@ -90,6 +90,14 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
FCVTDW(v0, ed, RD_RNE);
}
break;
case 0x2C:
INST_NAME("CVTTSD2SI Gd, Ex");
nextop = F8;
GETGD;
GETEXSD(v0, 0);
// TODO: fastnan handling
FCVTLDxw(gd, v0, RD_RTZ);
break;
case 0x38: // these are some more SSSE4.2+ opcodes
opcode = F8;
switch(opcode) {
@ -149,6 +157,21 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
GETEXSD(v1, 0);
FSUBD(v0, v0, v1);
break;
case 0x5D:
INST_NAME("MINSD Gx, Ex");
nextop = F8;
GETGXSD(v0);
GETEXSD(v1, 0);
FEQD(x2, v0, v0);
FEQD(x3, v1, v1);
AND(x2, x2, x3);
BEQ_MARK(x2, xZR);
FLTD(x2, v1, v0);
BEQ_MARK2(x2, xZR);
MARK;
FMVD(v0, v1);
MARK2;
break;
case 0x5E:
INST_NAME("DIVSD Gx, Ex");
nextop = F8;

View File

@ -456,6 +456,10 @@ f2831 ft811 FP temporaries Caller
#define FCVTSD(frd, frs1) EMIT(R_type(0b0100000, 0b00001, frs1, 0b000, frd, 0b1010011))
// Convert Single frs1 to Double frd
#define FCVTDS(frd, frs1) EMIT(R_type(0b0100001, 0b00000, frs1, 0b000, frd, 0b1010011))
// Convert from Double to signed 32bits
#define FCVTWD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00000, frs1, rm, rd, 0b1010011))
// Convert from Double to unsigned 32bits
#define FCVTWUD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00001, frs1, rm, rd, 0b1010011))
// store rs1 with rs2 sign bit to rd
#define FSGNJD(rd, rs1, rs2) EMIT(R_type(0b0010001, rs2, rs1, 0b000, rd, 0b1010011))
// move rs1 to rd
@ -497,4 +501,9 @@ f2831 ft811 FP temporaries Caller
// Convert from Double to unsigned 64bits
#define FCVTLUD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00011, frs1, rm, rd, 0b1010011))
// Convert from Double to signed integer
#define FCVTLDxw(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00000+(rex.w?0b10:0b00), frs1, rm, rd, 0b1010011))
// Convert from Double to unsigned integer
#define FCVTLUDxw(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00001+(rex.w?0b10:0b00), frs1, rm, rd, 0b1010011))
#endif //__RV64_EMITTER_H__