mirror of
https://github.com/shadergz/cosmic-station.git
synced 2024-11-27 00:00:21 +00:00
(Creeper, VU)
: Adds some multiplication instructions to the VU
This commit is contained in:
parent
ea77d1f765
commit
a05cebb499
@ -143,6 +143,7 @@ namespace cosmic::creeper {
|
||||
static void ori(Operands ops);
|
||||
static void xori(Operands ops);
|
||||
static void jr(Operands ops);
|
||||
static void jalr(Operands ops);
|
||||
|
||||
static void bne(Operands ops);
|
||||
|
||||
|
@ -16,6 +16,7 @@ namespace cosmic::creeper {
|
||||
{SpecialSrlv, {&srlv, "SRLV"}},
|
||||
{SpecialSrav, {&srav, "SRAV"}},
|
||||
{SpecialJr, {&jr, "JR"}},
|
||||
{SpecialJalr, {&jalr, "JALR $RD, $RS"}},
|
||||
{SpecialMovZ, {&movz, "MOVZ"}},
|
||||
{SpecialMovN, {&movn, "MOVN"}},
|
||||
{SpecialSyscall, {&iSyscall, "SYSCALL"}},
|
||||
|
@ -164,6 +164,17 @@ namespace cosmic::creeper {
|
||||
cpu->delaySlot = {};
|
||||
cpu->GPRs[ee::$v0].qw = {1};
|
||||
}
|
||||
void MipsIvInterpreter::jalr(Operands ops) {
|
||||
// 8 because the delay slot
|
||||
const u32 retAddr{cpu->eePc + 8};
|
||||
cpu->chPc(retAddr);
|
||||
cpu->delaySlot = 1;
|
||||
|
||||
cpu->GPRs[ops.rd].words[0] = retAddr;
|
||||
if (ops.rd == ee::$ra) {
|
||||
}
|
||||
}
|
||||
|
||||
void MipsIvInterpreter::addi(Operands ops) {
|
||||
doReg(ops.rt) = getOffset(ops) + doReg(ops.rs);
|
||||
}
|
||||
|
@ -46,6 +46,16 @@ namespace cosmic::creeper {
|
||||
static void iddai(VuMicroOperands& ops);
|
||||
static void mtir(VuMicroOperands& ops);
|
||||
static void mr32(VuMicroOperands& ops);
|
||||
static void mul(VuMicroOperands& ops);
|
||||
static void mula(VuMicroOperands& ops);
|
||||
static void mulabc(VuMicroOperands& ops);
|
||||
static void mulai(VuMicroOperands& ops);
|
||||
static void mulaq(VuMicroOperands& ops);
|
||||
|
||||
static std::function<void(VuMicroOperands& ops,
|
||||
std::function<void(u32 xYzW)>)> vectorizeXyZw;
|
||||
|
||||
static f32 fasterPs2VuMul(Reg rad, u32 id, Reg mul, u32 idx);
|
||||
private:
|
||||
VuMicroOrder ordered;
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
namespace cosmic::creeper {
|
||||
void VuMicroInterpreter::mr32(VuMicroOperands& ops) {
|
||||
static const std::array<u8, 4> rotates{8, 4, 2, 1};
|
||||
f32 fsX{vu->VuGPRs[ops.fs].floats[0]};
|
||||
f32 fsX{vu->vuGPRs[ops.fs].floats[0]};
|
||||
|
||||
const i32 x{ops.fd & rotates[0]};
|
||||
const i32 y{ops.fd & rotates[1]};
|
||||
@ -11,13 +11,79 @@ namespace cosmic::creeper {
|
||||
const i32 w{ops.fd & rotates[3]};
|
||||
|
||||
if (x)
|
||||
vu->VuGPRs[ops.ft].floats[0] = vu->VuGPRs[ops.fs].floats[1];
|
||||
vu->vuGPRs[ops.ft].floats[0] = vu->vuGPRs[ops.fs].floats[1];
|
||||
if (y)
|
||||
vu->VuGPRs[ops.ft].floats[1] = vu->VuGPRs[ops.fs].floats[2];
|
||||
vu->vuGPRs[ops.ft].floats[1] = vu->vuGPRs[ops.fs].floats[2];
|
||||
if (z)
|
||||
vu->VuGPRs[ops.ft].floats[2] = vu->VuGPRs[ops.fs].floats[3];
|
||||
vu->vuGPRs[ops.ft].floats[2] = vu->vuGPRs[ops.fs].floats[3];
|
||||
if (w)
|
||||
vu->VuGPRs[ops.ft].floats[3] = fsX;
|
||||
vu->vuGPRs[ops.ft].floats[3] = fsX;
|
||||
}
|
||||
f32 VuMicroInterpreter::fasterPs2VuMul(Reg rad, u32 id, Reg mul, u32 idx) {
|
||||
f32 vmuli{};
|
||||
switch (id) {
|
||||
case 100 ... 103:
|
||||
vmuli = vu->vuGPRs[rad].floats[id - 100]; break;
|
||||
case 104:
|
||||
vmuli = vu->getSpecialReg(vu::I).hd; break;
|
||||
case 105:
|
||||
vmuli = vu->getSpecialReg(vu::Q).hd; break;
|
||||
default:
|
||||
vmuli = vu->vuGPRs[rad].floats[idx];
|
||||
}
|
||||
const f32 vbase{vu->vuGPRs[mul].floats[idx]};
|
||||
|
||||
// https://fobes.dev/ps2/detecting-emu-vu-floats
|
||||
[[likely]] if (
|
||||
vmuli != 1. ||
|
||||
vbase != 1.) {
|
||||
return vu->toSony754(static_cast<u32>(vmuli)) *
|
||||
vu->toSony754(static_cast<u32>(vbase));
|
||||
}
|
||||
return {};
|
||||
}
|
||||
std::function<void(VuMicroOperands&,
|
||||
std::function<void(u32)>)> VuMicroInterpreter::vectorizeXyZw{
|
||||
[](VuMicroOperands& ops, std::function<void(u32 xYzW)> callback) {
|
||||
for (u32 xYzW{}; xYzW < 4; xYzW++) {
|
||||
if (ops.field & (1 << (3 - xYzW))) {
|
||||
vu->clsMacFlags(xYzW);
|
||||
continue;
|
||||
}
|
||||
callback(xYzW);
|
||||
}
|
||||
}};
|
||||
|
||||
// Those instruction is applicable to both VU0 and VU1
|
||||
void VuMicroInterpreter::mul(VuMicroOperands& ops) {
|
||||
vectorizeXyZw(ops, [&](u32 xYzW){
|
||||
vu->vuGPRs[ops.fd].floats[xYzW] = vu->modifierMacFlags(
|
||||
fasterPs2VuMul(ops.ft, 0, ops.fs, xYzW), xYzW);
|
||||
});
|
||||
}
|
||||
void VuMicroInterpreter::mula(VuMicroOperands& ops) {
|
||||
vectorizeXyZw(ops, [&](u32 xYzW){
|
||||
vu->acc.floats[xYzW] = vu->modifierMacFlags(
|
||||
fasterPs2VuMul(ops.ft, 0, ops.fs, xYzW), xYzW);
|
||||
});
|
||||
}
|
||||
void VuMicroInterpreter::mulabc(VuMicroOperands& ops) {
|
||||
vectorizeXyZw(ops, [&](u32 xYzW){
|
||||
vu->acc.floats[xYzW] = vu->modifierMacFlags(
|
||||
fasterPs2VuMul(ops.ft, 100 + ops.bc, ops.fs, xYzW), xYzW);
|
||||
});
|
||||
}
|
||||
void VuMicroInterpreter::mulai(VuMicroOperands& ops) {
|
||||
vectorizeXyZw(ops, [&](u32 xYzW){
|
||||
vu->acc.floats[xYzW] = vu->modifierMacFlags(
|
||||
fasterPs2VuMul(ops.ft, 104, ops.fs, xYzW), xYzW);
|
||||
});
|
||||
}
|
||||
void VuMicroInterpreter::mulaq(VuMicroOperands& ops) {
|
||||
vectorizeXyZw(ops, [&](u32 xYzW){
|
||||
vu->acc.floats[xYzW] = vu->modifierMacFlags(
|
||||
fasterPs2VuMul(ops.ft, 105, ops.fs, xYzW), xYzW);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,10 +4,10 @@
|
||||
namespace cosmic::creeper {
|
||||
void VuMicroInterpreter::iddai(VuMicroOperands& ops) {
|
||||
vu->pushIntPipe(ops.ft & 0xf, ops.fs & 0xf);
|
||||
i16 imm{static_cast<i16>((ops.inst >> 6) & 0x1f)};
|
||||
auto imm{static_cast<i16>((ops.inst >> 6) & 0x1f)};
|
||||
imm = static_cast<i16>((imm & 0x10 ? 0xfff0 : 0) | (imm & 0xf));
|
||||
|
||||
vu::VuIntReg vui{vu->intsRegs[ops.fs].sig + imm};
|
||||
auto vui{vu->intsRegs[ops.fs].sig + imm};
|
||||
vu->intsRegs[ops.ft] = vui;
|
||||
}
|
||||
void VuMicroInterpreter::mtir(VuMicroOperands& ops) {
|
||||
@ -15,6 +15,6 @@ namespace cosmic::creeper {
|
||||
u32 id{(ops.inst >> 21) & 0x3};
|
||||
if (id > 3) {
|
||||
}
|
||||
vu->intsRegs[ops.ft & 0xf].uns = static_cast<u16>(vu->VuGPRs[ops.fs].uns[id]);
|
||||
vu->intsRegs[ops.ft & 0xf].uns = static_cast<u16>(vu->vuGPRs[ops.fs].uns[id]);
|
||||
}
|
||||
}
|
@ -88,6 +88,7 @@ namespace cosmic::ee {
|
||||
}
|
||||
inline void chPc(u32 neoPC) {
|
||||
lastPc = eePc;
|
||||
isABranch = true;
|
||||
eePc = neoPC;
|
||||
}
|
||||
i64 getHtzCycles(bool total) const;
|
||||
|
@ -39,6 +39,7 @@ namespace cosmic::ee {
|
||||
SpecialSrav,
|
||||
// Note: This is our first flavor of JUMP added (Jump Register)
|
||||
SpecialJr,
|
||||
SpecialJalr,
|
||||
SpecialMovZ = 0xa,
|
||||
SpecialMovN,
|
||||
SpecialSyscall,
|
||||
|
@ -37,7 +37,7 @@ namespace cosmic::vu {
|
||||
exe = std::make_unique<creeper::VuMicroInterpreter>(*this);
|
||||
|
||||
// vf00 is hardwired to the vector {0.0, 0.0, 0.0, 1.0}
|
||||
VuGPRs[0].w = 1.0;
|
||||
vuGPRs[0].w = 1.0;
|
||||
intsRegs[0].uns = 0;
|
||||
status.isVuExecuting = false;
|
||||
clock.isDirty = false;
|
||||
@ -140,16 +140,17 @@ namespace cosmic::vu {
|
||||
spQ.hd = 0.f;
|
||||
spR.hd = 0.f;
|
||||
spP.hd = 0.f;
|
||||
acc.faster = {};
|
||||
|
||||
for (u8 gpr{0}; gpr < 32; gpr++) {
|
||||
if (gpr < 16)
|
||||
intsRegs[gpr].uns = 0;
|
||||
if (!gpr)
|
||||
continue;
|
||||
VuGPRs[gpr].floats[0] = 0;
|
||||
VuGPRs[gpr].floats[1] = 0;
|
||||
VuGPRs[gpr].floats[2] = 0;
|
||||
VuGPRs[gpr].floats[3] = 0;
|
||||
vuGPRs[gpr].floats[0] = 0;
|
||||
vuGPRs[gpr].floats[1] = 0;
|
||||
vuGPRs[gpr].floats[2] = 0;
|
||||
vuGPRs[gpr].floats[3] = 0;
|
||||
}
|
||||
|
||||
}
|
||||
@ -273,4 +274,43 @@ namespace cosmic::vu {
|
||||
vuPc += 4;
|
||||
return opcode;
|
||||
}
|
||||
|
||||
f32 VectorUnit::modifierMacFlags(const f32 val, u32 index) {
|
||||
auto treat{static_cast<u32>(val)};
|
||||
const auto flagId{3 - index};
|
||||
|
||||
// Test the sign bit of the float, we need to set or clear the sign flag
|
||||
if (treat & 0x8000'0000)
|
||||
nextFlagsPipe |= 0x10 << flagId;
|
||||
else // Cleaning
|
||||
nextFlagsPipe &= ~(0x10 << flagId);
|
||||
|
||||
// Zero, Clear Underflow and Overflow
|
||||
if ((treat & 0x7fff'ffff) == 0) {
|
||||
nextFlagsPipe |= 1 << flagId;
|
||||
nextFlagsPipe &= ~(0x1100 << flagId);
|
||||
|
||||
return val;
|
||||
}
|
||||
// https://github.com/PSI-Rockin/DobieStation/blob/master/src/core/ee/vu.cpp#L1023
|
||||
switch ((treat >> 23) & 0xff) {
|
||||
case 0: // Underflow, set zero
|
||||
nextFlagsPipe |= 0x101 << flagId;
|
||||
nextFlagsPipe |= ~(0x1000 << flagId);
|
||||
treat &= 0x8000'0000;
|
||||
break;
|
||||
case 255: // Overflow
|
||||
nextFlagsPipe |= 0x1000 << flagId;
|
||||
nextFlagsPipe &= ~(0x101 << flagId);
|
||||
treat &= 0x8000'0000;
|
||||
treat |= 0x7f7f'ffff;
|
||||
break;
|
||||
default: // Clear all but sign
|
||||
nextFlagsPipe &= ~(0x1101 << flagId);
|
||||
}
|
||||
return static_cast<f32>(treat);
|
||||
}
|
||||
void VectorUnit::clsMacFlags(u32 index) {
|
||||
nextFlagsPipe &= ~(0x1111 << (3 - index));
|
||||
}
|
||||
}
|
@ -99,7 +99,8 @@ namespace cosmic::vu {
|
||||
void startXgKick2Gif();
|
||||
void handleDataTransfer();
|
||||
|
||||
alignas(512) std::array<VuReg, 32> VuGPRs;
|
||||
alignas(512) std::array<VuReg, 32> vuGPRs;
|
||||
VuReg acc;
|
||||
alignas(32) std::array<VuIntReg, 16> intsRegs;
|
||||
|
||||
void establishVif(u16 conTops[2], Ref<gs::GifBridge> gif);
|
||||
@ -118,7 +119,7 @@ namespace cosmic::vu {
|
||||
|
||||
u32 fetchByPc();
|
||||
VuWorkMemory vecRegion;
|
||||
[[clang::always_inline]] void setSpecialReg(VuSpecialReg reg, const u32 uns) {
|
||||
void setSpecialReg(VuSpecialReg reg, const u32 uns) {
|
||||
if (reg == VuSpecialReg::I)
|
||||
spI.uns = uns;
|
||||
else if (reg == VuSpecialReg::Q)
|
||||
@ -128,13 +129,38 @@ namespace cosmic::vu {
|
||||
else if (reg == VuSpecialReg::P)
|
||||
spP.uns = uns;
|
||||
}
|
||||
auto getSpecialReg(VuSpecialReg reg) const {
|
||||
switch (reg) {
|
||||
case I:
|
||||
return spI;
|
||||
case Q:
|
||||
return spQ;
|
||||
case R:
|
||||
return spR;
|
||||
case P:
|
||||
return spP;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
VuRegUnique spI, spQ, spR, spP;
|
||||
[[clang::always_inline]] f32 toSony754(const u32 value) {
|
||||
switch (value & 0x7f800000) {
|
||||
case 0:
|
||||
return static_cast<f32>(value & 0x80000000);
|
||||
case 0x7f800000:
|
||||
return static_cast<f32>((value & 0x80000000) | 0x7f7fffff);
|
||||
}
|
||||
return static_cast<f32>(value);
|
||||
}
|
||||
|
||||
f32 modifierMacFlags(const f32 val, u32 index);
|
||||
void clsMacFlags(u32 index);
|
||||
VuRegUnique
|
||||
cachedQ,
|
||||
cachedP;
|
||||
|
||||
private:
|
||||
VuRegUnique spI, spQ, spR, spP;
|
||||
|
||||
std::shared_ptr<ee::EeMipsCore> ee;
|
||||
void updateMacPipeline();
|
||||
void updateDeltaCycles(i64 add, bool incCount = false);
|
||||
@ -147,7 +173,12 @@ namespace cosmic::vu {
|
||||
|
||||
std::array<u8, 4> clipFlags;
|
||||
u8 cfIndex;
|
||||
// The flags hole pipeline
|
||||
std::array<u16, 4> macFlags;
|
||||
// This variable carries the flags modified during the VU operations
|
||||
// This value will be placed in the VU pipeline as soon as possible
|
||||
// 0(1111) X, 4(1111) Y, 8(1111) Z, 12(1111) W
|
||||
// modifierMacFlags, clsMacFlags
|
||||
u16 nextFlagsPipe;
|
||||
u8 mfIndex;
|
||||
u16 gifAddr;
|
||||
|
Loading…
Reference in New Issue
Block a user