(Creeper, VU): Adds some multiplication instructions to the VU

This commit is contained in:
Gabriel Correia 2024-06-17 14:22:48 -03:00
parent ea77d1f765
commit a05cebb499
10 changed files with 179 additions and 17 deletions

View File

@ -143,6 +143,7 @@ namespace cosmic::creeper {
static void ori(Operands ops);
static void xori(Operands ops);
static void jr(Operands ops);
static void jalr(Operands ops);
static void bne(Operands ops);

View File

@ -16,6 +16,7 @@ namespace cosmic::creeper {
{SpecialSrlv, {&srlv, "SRLV"}},
{SpecialSrav, {&srav, "SRAV"}},
{SpecialJr, {&jr, "JR"}},
{SpecialJalr, {&jalr, "JALR $RD, $RS"}},
{SpecialMovZ, {&movz, "MOVZ"}},
{SpecialMovN, {&movn, "MOVN"}},
{SpecialSyscall, {&iSyscall, "SYSCALL"}},

View File

@ -164,6 +164,17 @@ namespace cosmic::creeper {
cpu->delaySlot = {};
cpu->GPRs[ee::$v0].qw = {1};
}
void MipsIvInterpreter::jalr(Operands ops) {
// 8 because the delay slot
const u32 retAddr{cpu->eePc + 8};
cpu->chPc(retAddr);
cpu->delaySlot = 1;
cpu->GPRs[ops.rd].words[0] = retAddr;
if (ops.rd == ee::$ra) {
}
}
void MipsIvInterpreter::addi(Operands ops) {
doReg(ops.rt) = getOffset(ops) + doReg(ops.rs);
}

View File

@ -46,6 +46,16 @@ namespace cosmic::creeper {
static void iddai(VuMicroOperands& ops);
static void mtir(VuMicroOperands& ops);
static void mr32(VuMicroOperands& ops);
static void mul(VuMicroOperands& ops);
static void mula(VuMicroOperands& ops);
static void mulabc(VuMicroOperands& ops);
static void mulai(VuMicroOperands& ops);
static void mulaq(VuMicroOperands& ops);
static std::function<void(VuMicroOperands& ops,
std::function<void(u32 xYzW)>)> vectorizeXyZw;
static f32 fasterPs2VuMul(Reg rad, u32 id, Reg mul, u32 idx);
private:
VuMicroOrder ordered;

View File

@ -3,7 +3,7 @@
namespace cosmic::creeper {
void VuMicroInterpreter::mr32(VuMicroOperands& ops) {
static const std::array<u8, 4> rotates{8, 4, 2, 1};
f32 fsX{vu->VuGPRs[ops.fs].floats[0]};
f32 fsX{vu->vuGPRs[ops.fs].floats[0]};
const i32 x{ops.fd & rotates[0]};
const i32 y{ops.fd & rotates[1]};
@ -11,13 +11,79 @@ namespace cosmic::creeper {
const i32 w{ops.fd & rotates[3]};
if (x)
vu->VuGPRs[ops.ft].floats[0] = vu->VuGPRs[ops.fs].floats[1];
vu->vuGPRs[ops.ft].floats[0] = vu->vuGPRs[ops.fs].floats[1];
if (y)
vu->VuGPRs[ops.ft].floats[1] = vu->VuGPRs[ops.fs].floats[2];
vu->vuGPRs[ops.ft].floats[1] = vu->vuGPRs[ops.fs].floats[2];
if (z)
vu->VuGPRs[ops.ft].floats[2] = vu->VuGPRs[ops.fs].floats[3];
vu->vuGPRs[ops.ft].floats[2] = vu->vuGPRs[ops.fs].floats[3];
if (w)
vu->VuGPRs[ops.ft].floats[3] = fsX;
vu->vuGPRs[ops.ft].floats[3] = fsX;
}
f32 VuMicroInterpreter::fasterPs2VuMul(Reg rad, u32 id, Reg mul, u32 idx) {
f32 vmuli{};
switch (id) {
case 100 ... 103:
vmuli = vu->vuGPRs[rad].floats[id - 100]; break;
case 104:
vmuli = vu->getSpecialReg(vu::I).hd; break;
case 105:
vmuli = vu->getSpecialReg(vu::Q).hd; break;
default:
vmuli = vu->vuGPRs[rad].floats[idx];
}
const f32 vbase{vu->vuGPRs[mul].floats[idx]};
// https://fobes.dev/ps2/detecting-emu-vu-floats
[[likely]] if (
vmuli != 1. ||
vbase != 1.) {
return vu->toSony754(static_cast<u32>(vmuli)) *
vu->toSony754(static_cast<u32>(vbase));
}
return {};
}
std::function<void(VuMicroOperands&,
std::function<void(u32)>)> VuMicroInterpreter::vectorizeXyZw{
[](VuMicroOperands& ops, std::function<void(u32 xYzW)> callback) {
for (u32 xYzW{}; xYzW < 4; xYzW++) {
if (ops.field & (1 << (3 - xYzW))) {
vu->clsMacFlags(xYzW);
continue;
}
callback(xYzW);
}
}};
// Those instruction is applicable to both VU0 and VU1
void VuMicroInterpreter::mul(VuMicroOperands& ops) {
vectorizeXyZw(ops, [&](u32 xYzW){
vu->vuGPRs[ops.fd].floats[xYzW] = vu->modifierMacFlags(
fasterPs2VuMul(ops.ft, 0, ops.fs, xYzW), xYzW);
});
}
void VuMicroInterpreter::mula(VuMicroOperands& ops) {
vectorizeXyZw(ops, [&](u32 xYzW){
vu->acc.floats[xYzW] = vu->modifierMacFlags(
fasterPs2VuMul(ops.ft, 0, ops.fs, xYzW), xYzW);
});
}
void VuMicroInterpreter::mulabc(VuMicroOperands& ops) {
vectorizeXyZw(ops, [&](u32 xYzW){
vu->acc.floats[xYzW] = vu->modifierMacFlags(
fasterPs2VuMul(ops.ft, 100 + ops.bc, ops.fs, xYzW), xYzW);
});
}
void VuMicroInterpreter::mulai(VuMicroOperands& ops) {
vectorizeXyZw(ops, [&](u32 xYzW){
vu->acc.floats[xYzW] = vu->modifierMacFlags(
fasterPs2VuMul(ops.ft, 104, ops.fs, xYzW), xYzW);
});
}
void VuMicroInterpreter::mulaq(VuMicroOperands& ops) {
vectorizeXyZw(ops, [&](u32 xYzW){
vu->acc.floats[xYzW] = vu->modifierMacFlags(
fasterPs2VuMul(ops.ft, 105, ops.fs, xYzW), xYzW);
});
}
}

View File

@ -4,10 +4,10 @@
namespace cosmic::creeper {
void VuMicroInterpreter::iddai(VuMicroOperands& ops) {
vu->pushIntPipe(ops.ft & 0xf, ops.fs & 0xf);
i16 imm{static_cast<i16>((ops.inst >> 6) & 0x1f)};
auto imm{static_cast<i16>((ops.inst >> 6) & 0x1f)};
imm = static_cast<i16>((imm & 0x10 ? 0xfff0 : 0) | (imm & 0xf));
vu::VuIntReg vui{vu->intsRegs[ops.fs].sig + imm};
auto vui{vu->intsRegs[ops.fs].sig + imm};
vu->intsRegs[ops.ft] = vui;
}
void VuMicroInterpreter::mtir(VuMicroOperands& ops) {
@ -15,6 +15,6 @@ namespace cosmic::creeper {
u32 id{(ops.inst >> 21) & 0x3};
if (id > 3) {
}
vu->intsRegs[ops.ft & 0xf].uns = static_cast<u16>(vu->VuGPRs[ops.fs].uns[id]);
vu->intsRegs[ops.ft & 0xf].uns = static_cast<u16>(vu->vuGPRs[ops.fs].uns[id]);
}
}

View File

@ -88,6 +88,7 @@ namespace cosmic::ee {
}
inline void chPc(u32 neoPC) {
lastPc = eePc;
isABranch = true;
eePc = neoPC;
}
i64 getHtzCycles(bool total) const;

View File

@ -39,6 +39,7 @@ namespace cosmic::ee {
SpecialSrav,
// Note: This is our first flavor of JUMP added (Jump Register)
SpecialJr,
SpecialJalr,
SpecialMovZ = 0xa,
SpecialMovN,
SpecialSyscall,

View File

@ -37,7 +37,7 @@ namespace cosmic::vu {
exe = std::make_unique<creeper::VuMicroInterpreter>(*this);
// vf00 is hardwired to the vector {0.0, 0.0, 0.0, 1.0}
VuGPRs[0].w = 1.0;
vuGPRs[0].w = 1.0;
intsRegs[0].uns = 0;
status.isVuExecuting = false;
clock.isDirty = false;
@ -140,16 +140,17 @@ namespace cosmic::vu {
spQ.hd = 0.f;
spR.hd = 0.f;
spP.hd = 0.f;
acc.faster = {};
for (u8 gpr{0}; gpr < 32; gpr++) {
if (gpr < 16)
intsRegs[gpr].uns = 0;
if (!gpr)
continue;
VuGPRs[gpr].floats[0] = 0;
VuGPRs[gpr].floats[1] = 0;
VuGPRs[gpr].floats[2] = 0;
VuGPRs[gpr].floats[3] = 0;
vuGPRs[gpr].floats[0] = 0;
vuGPRs[gpr].floats[1] = 0;
vuGPRs[gpr].floats[2] = 0;
vuGPRs[gpr].floats[3] = 0;
}
}
@ -273,4 +274,43 @@ namespace cosmic::vu {
vuPc += 4;
return opcode;
}
f32 VectorUnit::modifierMacFlags(const f32 val, u32 index) {
auto treat{static_cast<u32>(val)};
const auto flagId{3 - index};
// Test the sign bit of the float, we need to set or clear the sign flag
if (treat & 0x8000'0000)
nextFlagsPipe |= 0x10 << flagId;
else // Cleaning
nextFlagsPipe &= ~(0x10 << flagId);
// Zero, Clear Underflow and Overflow
if ((treat & 0x7fff'ffff) == 0) {
nextFlagsPipe |= 1 << flagId;
nextFlagsPipe &= ~(0x1100 << flagId);
return val;
}
// https://github.com/PSI-Rockin/DobieStation/blob/master/src/core/ee/vu.cpp#L1023
switch ((treat >> 23) & 0xff) {
case 0: // Underflow, set zero
nextFlagsPipe |= 0x101 << flagId;
nextFlagsPipe |= ~(0x1000 << flagId);
treat &= 0x8000'0000;
break;
case 255: // Overflow
nextFlagsPipe |= 0x1000 << flagId;
nextFlagsPipe &= ~(0x101 << flagId);
treat &= 0x8000'0000;
treat |= 0x7f7f'ffff;
break;
default: // Clear all but sign
nextFlagsPipe &= ~(0x1101 << flagId);
}
return static_cast<f32>(treat);
}
void VectorUnit::clsMacFlags(u32 index) {
nextFlagsPipe &= ~(0x1111 << (3 - index));
}
}

View File

@ -99,7 +99,8 @@ namespace cosmic::vu {
void startXgKick2Gif();
void handleDataTransfer();
alignas(512) std::array<VuReg, 32> VuGPRs;
alignas(512) std::array<VuReg, 32> vuGPRs;
VuReg acc;
alignas(32) std::array<VuIntReg, 16> intsRegs;
void establishVif(u16 conTops[2], Ref<gs::GifBridge> gif);
@ -118,7 +119,7 @@ namespace cosmic::vu {
u32 fetchByPc();
VuWorkMemory vecRegion;
[[clang::always_inline]] void setSpecialReg(VuSpecialReg reg, const u32 uns) {
void setSpecialReg(VuSpecialReg reg, const u32 uns) {
if (reg == VuSpecialReg::I)
spI.uns = uns;
else if (reg == VuSpecialReg::Q)
@ -128,13 +129,38 @@ namespace cosmic::vu {
else if (reg == VuSpecialReg::P)
spP.uns = uns;
}
auto getSpecialReg(VuSpecialReg reg) const {
switch (reg) {
case I:
return spI;
case Q:
return spQ;
case R:
return spR;
case P:
return spP;
}
}
private:
VuRegUnique spI, spQ, spR, spP;
[[clang::always_inline]] f32 toSony754(const u32 value) {
switch (value & 0x7f800000) {
case 0:
return static_cast<f32>(value & 0x80000000);
case 0x7f800000:
return static_cast<f32>((value & 0x80000000) | 0x7f7fffff);
}
return static_cast<f32>(value);
}
f32 modifierMacFlags(const f32 val, u32 index);
void clsMacFlags(u32 index);
VuRegUnique
cachedQ,
cachedP;
private:
VuRegUnique spI, spQ, spR, spP;
std::shared_ptr<ee::EeMipsCore> ee;
void updateMacPipeline();
void updateDeltaCycles(i64 add, bool incCount = false);
@ -147,7 +173,12 @@ namespace cosmic::vu {
std::array<u8, 4> clipFlags;
u8 cfIndex;
// The flags hole pipeline
std::array<u16, 4> macFlags;
// This variable carries the flags modified during the VU operations
// This value will be placed in the VU pipeline as soon as possible
// 0(1111) X, 4(1111) Y, 8(1111) Z, 12(1111) W
// modifierMacFlags, clsMacFlags
u16 nextFlagsPipe;
u8 mfIndex;
u16 gifAddr;