softgpu: Correct matrix value update wrapping.

The values read back when saving a context or getting matrix data are set
differently than the actual values used for rendering.

This implements the wrapping and bleeding between matrices within softgpu,
but leaves hardware rendering to only use the rendering registers for
speed.
This commit is contained in:
Unknown W. Brackets 2022-09-27 22:29:55 -07:00
parent 95d2083f04
commit 6b20c0318d
7 changed files with 233 additions and 92 deletions

View File

@ -524,8 +524,10 @@ static int sceGeGetMtx(int type, u32 matrixPtr) {
return hleLogError(SCEGE, -1, "bad matrix ptr");
}
u32 *dest = (u32 *)Memory::GetPointerWriteUnchecked(matrixPtr);
if (!gpu || !gpu->GetMatrix24(GEMatrixType(type), dest))
u32_le *dest = (u32_le *)Memory::GetPointerWriteUnchecked(matrixPtr);
// Note: this reads the CPU-visible matrix values, which may differ from the actual used values.
// They only differ when more DATA commands are sent than are valid for a matrix.
if (!gpu || !gpu->GetMatrix24(GEMatrixType(type), dest, 0))
return hleLogError(SCEGE, SCE_KERNEL_ERROR_INVALID_INDEX, "invalid matrix");
return hleLogSuccessInfoI(SCEGE, 0);

View File

@ -425,6 +425,7 @@ GPUCommon::GPUCommon(GraphicsContext *gfxCtx, Draw::DrawContext *draw) :
UpdateCmdInfo();
UpdateVsyncInterval(true);
ResetMatrices();
PPGeSetDrawContext(draw);
}
@ -731,13 +732,13 @@ int GPUCommon::GetStack(int index, u32 stackPtr) {
return currentList->stackptr;
}
static void CopyMatrix24(u32 *result, float *mtx, u32 count) {
static void CopyMatrix24(u32_le *result, const float *mtx, u32 count, u32 cmdbits) {
for (u32 i = 0; i < count; ++i) {
result[i] = toFloat24(mtx[i]);
result[i] = toFloat24(mtx[i]) | cmdbits;
}
}
bool GPUCommon::GetMatrix24(GEMatrixType type, u32 *result) {
bool GPUCommon::GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) {
switch (type) {
case GE_MTX_BONE0:
case GE_MTX_BONE1:
@ -747,19 +748,19 @@ bool GPUCommon::GetMatrix24(GEMatrixType type, u32 *result) {
case GE_MTX_BONE5:
case GE_MTX_BONE6:
case GE_MTX_BONE7:
CopyMatrix24(result, gstate.boneMatrix + (type - GE_MTX_BONE0) * 12, 12);
CopyMatrix24(result, gstate.boneMatrix + (type - GE_MTX_BONE0) * 12, 12, cmdbits);
break;
case GE_MTX_TEXGEN:
CopyMatrix24(result, gstate.tgenMatrix, 12);
CopyMatrix24(result, gstate.tgenMatrix, 12, cmdbits);
break;
case GE_MTX_WORLD:
CopyMatrix24(result, gstate.worldMatrix, 12);
CopyMatrix24(result, gstate.worldMatrix, 12, cmdbits);
break;
case GE_MTX_VIEW:
CopyMatrix24(result, gstate.viewMatrix, 12);
CopyMatrix24(result, gstate.viewMatrix, 12, cmdbits);
break;
case GE_MTX_PROJECTION:
CopyMatrix24(result, gstate.projMatrix, 16);
CopyMatrix24(result, gstate.projMatrix, 16, cmdbits);
break;
default:
return false;
@ -767,6 +768,20 @@ bool GPUCommon::GetMatrix24(GEMatrixType type, u32 *result) {
return true;
}
void GPUCommon::ResetMatrices() {
// This means we restored a context, so update the visible matrix data.
for (size_t i = 0; i < ARRAY_SIZE(gstate.boneMatrix); ++i)
matrixVisible.bone[i] = toFloat24(gstate.boneMatrix[i]);
for (size_t i = 0; i < ARRAY_SIZE(gstate.worldMatrix); ++i)
matrixVisible.world[i] = toFloat24(gstate.worldMatrix[i]);
for (size_t i = 0; i < ARRAY_SIZE(gstate.viewMatrix); ++i)
matrixVisible.view[i] = toFloat24(gstate.viewMatrix[i]);
for (size_t i = 0; i < ARRAY_SIZE(gstate.projMatrix); ++i)
matrixVisible.proj[i] = toFloat24(gstate.projMatrix[i]);
for (size_t i = 0; i < ARRAY_SIZE(gstate.tgenMatrix); ++i)
matrixVisible.tgen[i] = toFloat24(gstate.tgenMatrix[i]);
}
u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer<PspGeListArgs> args, bool head) {
// TODO Check the stack values in missing arg and ajust the stack depth
@ -1389,7 +1404,7 @@ void GPUCommon::DoExecuteCall(u32 target) {
// Check for the end
if ((Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET &&
(gstate.boneMatrixNumber & 0x7F) <= 96 - 12) {
(gstate.boneMatrixNumber & 0x00FFFFFF) <= 96 - 12) {
// Yep, pretty sure this is a bone matrix call. Double check stall first.
if (target > currentList->stall || target + 12 * 4 < currentList->stall) {
FastLoadBoneMatrix(target);
@ -1923,7 +1938,7 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
(Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET &&
(target > currentList->stall || target + 12 * 4 < currentList->stall) &&
(gstate.boneMatrixNumber & 0x7F) <= 96 - 12) {
(gstate.boneMatrixNumber & 0x00FFFFFF) <= 96 - 12) {
FastLoadBoneMatrix(target);
} else {
goto bail;
@ -2167,7 +2182,7 @@ void GPUCommon::Execute_WorldMtxNum(u32 op, u32 diff) {
int i = 0;
// We must record the individual data commands while debugRecording_.
bool fastLoad = !debugRecording_;
bool fastLoad = !debugRecording_ && end > 0;
// Stalling in the middle of a matrix would be stupid, I doubt this check is necessary.
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
fastLoad = false;
@ -2188,7 +2203,7 @@ void GPUCommon::Execute_WorldMtxNum(u32 op, u32 diff) {
}
const int count = i;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op + count) & 0xF);
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op & 0xF) + count);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
@ -2197,7 +2212,7 @@ void GPUCommon::Execute_WorldMtxNum(u32 op, u32 diff) {
void GPUCommon::Execute_WorldMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.worldmtxnum & 0xF;
int num = gstate.worldmtxnum & 0x00FFFFFF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
Flush();
@ -2205,7 +2220,7 @@ void GPUCommon::Execute_WorldMtxData(u32 op, u32 diff) {
gstate_c.Dirty(DIRTY_WORLDMATRIX);
}
num++;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF);
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.worldmtxdata = GE_CMD_WORLDMATRIXDATA << 24;
}
@ -2216,7 +2231,7 @@ void GPUCommon::Execute_ViewMtxNum(u32 op, u32 diff) {
const int end = 12 - (op & 0xF);
int i = 0;
bool fastLoad = !debugRecording_;
bool fastLoad = !debugRecording_ && end > 0;
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
fastLoad = false;
}
@ -2236,7 +2251,7 @@ void GPUCommon::Execute_ViewMtxNum(u32 op, u32 diff) {
}
const int count = i;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op + count) & 0xF);
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op & 0xF) + count);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
@ -2245,7 +2260,7 @@ void GPUCommon::Execute_ViewMtxNum(u32 op, u32 diff) {
void GPUCommon::Execute_ViewMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.viewmtxnum & 0xF;
int num = gstate.viewmtxnum & 0x00FFFFFF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
Flush();
@ -2253,7 +2268,7 @@ void GPUCommon::Execute_ViewMtxData(u32 op, u32 diff) {
gstate_c.Dirty(DIRTY_VIEWMATRIX);
}
num++;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF);
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.viewmtxdata = GE_CMD_VIEWMATRIXDATA << 24;
}
@ -2284,7 +2299,7 @@ void GPUCommon::Execute_ProjMtxNum(u32 op, u32 diff) {
}
const int count = i;
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op + count) & 0x1F);
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op & 0xF) + count);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
@ -2293,16 +2308,16 @@ void GPUCommon::Execute_ProjMtxNum(u32 op, u32 diff) {
void GPUCommon::Execute_ProjMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.projmtxnum & 0x1F; // NOTE: Changed from 0xF to catch overflows
int num = gstate.projmtxnum & 0x00FFFFFF;
u32 newVal = op << 8;
if (num < 0x10 && newVal != ((const u32 *)gstate.projMatrix)[num]) {
if (num < 16 && newVal != ((const u32 *)gstate.projMatrix)[num]) {
Flush();
((u32 *)gstate.projMatrix)[num] = newVal;
gstate_c.Dirty(DIRTY_PROJMATRIX);
}
num++;
if (num <= 16)
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF);
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.projmtxdata = GE_CMD_PROJMATRIXDATA << 24;
}
@ -2313,7 +2328,7 @@ void GPUCommon::Execute_TgenMtxNum(u32 op, u32 diff) {
const int end = 12 - (op & 0xF);
int i = 0;
bool fastLoad = !debugRecording_;
bool fastLoad = !debugRecording_ && end > 0;
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
fastLoad = false;
}
@ -2333,7 +2348,7 @@ void GPUCommon::Execute_TgenMtxNum(u32 op, u32 diff) {
}
const int count = i;
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op + count) & 0xF);
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op & 0xF) + count);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
@ -2342,7 +2357,7 @@ void GPUCommon::Execute_TgenMtxNum(u32 op, u32 diff) {
void GPUCommon::Execute_TgenMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.texmtxnum & 0xF;
int num = gstate.texmtxnum & 0x00FFFFFF;
u32 newVal = op << 8;
if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
Flush();
@ -2350,7 +2365,7 @@ void GPUCommon::Execute_TgenMtxData(u32 op, u32 diff) {
gstate_c.Dirty(DIRTY_TEXMATRIX | DIRTY_FRAGMENTSHADER_STATE); // We check the matrix to see if we need projection
}
num++;
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF);
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.texmtxdata = GE_CMD_TGENMATRIXDATA << 24;
}
@ -2400,7 +2415,7 @@ void GPUCommon::Execute_BoneMtxNum(u32 op, u32 diff) {
}
const int count = i;
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op + count) & 0x7F);
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op & 0x7F) + count);
// Skip over the loaded data, it's done now.
UpdatePC(currentList->pc, currentList->pc + count * 4);
@ -2409,7 +2424,7 @@ void GPUCommon::Execute_BoneMtxNum(u32 op, u32 diff) {
void GPUCommon::Execute_BoneMtxData(u32 op, u32 diff) {
// Note: it's uncommon to get here now, see above.
int num = gstate.boneMatrixNumber & 0x7F;
int num = gstate.boneMatrixNumber & 0x00FFFFFF;
u32 newVal = op << 8;
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
// Bone matrices should NOT flush when software skinning is enabled!
@ -2422,7 +2437,7 @@ void GPUCommon::Execute_BoneMtxData(u32 op, u32 diff) {
((u32 *)gstate.boneMatrix)[num] = newVal;
}
num++;
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.boneMatrixData = GE_CMD_BONEMATRIXDATA << 24;
}
@ -2661,7 +2676,7 @@ struct DisplayList_v2 {
};
void GPUCommon::DoState(PointerWrap &p) {
auto s = p.Section("GPUCommon", 1, 4);
auto s = p.Section("GPUCommon", 1, 5);
if (!s)
return;
@ -2733,6 +2748,10 @@ void GPUCommon::DoState(PointerWrap &p) {
Do(p, isbreak);
Do(p, drawCompleteTicks);
Do(p, busyTicks);
if (s >= 5) {
Do(p, matrixVisible.all);
}
}
void GPUCommon::InterruptStart(int listid) {

View File

@ -111,7 +111,8 @@ public:
int ListSync(int listid, int mode) override;
u32 DrawSync(int mode) override;
int GetStack(int index, u32 stackPtr) override;
bool GetMatrix24(GEMatrixType type, u32 *result) override;
bool GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) override;
void ResetMatrices() override;
void DoState(PointerWrap &p) override;
bool BusyDrawing() override;
u32 Continue() override;
@ -366,6 +367,21 @@ protected:
uint32_t immFlags_ = 0;
bool immFirstSent_ = false;
// Whe matrix data overflows, the CPU visible values wrap and bleed between matrices.
// But this doesn't actually change the values used by rendering.
// The CPU visible values affect the GPU when list contexts are restored.
// Note: not maintained by all backends, here for save stating.
union {
struct {
u32 bone[12 * 8];
u32 world[12];
u32 view[12];
u32 proj[16];
u32 tgen[12];
};
u32 all[12 * 8 + 12 + 12 + 16 + 12];
} matrixVisible;
std::string reportingPrimaryInfo_;
std::string reportingFullInfo_;

View File

@ -198,7 +198,8 @@ public:
virtual u32 Continue() = 0;
virtual u32 Break(int mode) = 0;
virtual int GetStack(int index, u32 stackPtr) = 0;
virtual bool GetMatrix24(GEMatrixType type, u32 *result) = 0;
virtual bool GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) = 0;
virtual void ResetMatrices() = 0;
virtual void InterruptStart(int listid) = 0;
virtual void InterruptEnd(int listid) = 0;

View File

@ -24,6 +24,7 @@
#include "Core/System.h"
#include "Core/MemMap.h"
#include "GPU/ge_constants.h"
#include "GPU/GPUInterface.h"
#include "GPU/GPUState.h"
#ifdef _M_SSE
@ -85,11 +86,20 @@ static const CmdRange contextCmdRanges[] = {
// Skip: {0xFA, 0xFF},
};
static u32_le *SaveMatrix(u32_le *cmds, const float *mtx, int sz, int numcmd, int datacmd) {
static u32_le *SaveMatrix(u32_le *cmds, GEMatrixType type, int sz, int numcmd, int datacmd) {
if (!gpu)
return cmds;
*cmds++ = numcmd << 24;
for (int i = 0; i < sz; ++i) {
*cmds++ = (datacmd << 24) | toFloat24(mtx[i]);
// This saves the CPU-visible values, not the actual used ones, which may differ.
// Note that Restore overwrites both values.
if (type == GE_MTX_BONE0) {
for (int i = 0; i < 8; ++i)
gpu->GetMatrix24(GEMatrixType(GE_MTX_BONE0 + i), cmds + i * 12, datacmd << 24);
} else {
gpu->GetMatrix24(type, cmds, datacmd << 24);
}
cmds += sz;
return cmds;
}
@ -117,6 +127,9 @@ void GPUgstate::Reset() {
memset(gstate.tgenMatrix, 0, sizeof(gstate.tgenMatrix));
memset(gstate.boneMatrix, 0, sizeof(gstate.boneMatrix));
if (gpu)
gpu->ResetMatrices();
savedContextVersion = 1;
}
@ -152,11 +165,11 @@ void GPUgstate::Save(u32_le *ptr) {
memcpy(matrices, projMatrix, sizeof(projMatrix)); matrices += sizeof(projMatrix);
memcpy(matrices, tgenMatrix, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
} else {
cmds = SaveMatrix(cmds, boneMatrix, ARRAY_SIZE(boneMatrix), GE_CMD_BONEMATRIXNUMBER, GE_CMD_BONEMATRIXDATA);
cmds = SaveMatrix(cmds, worldMatrix, ARRAY_SIZE(worldMatrix), GE_CMD_WORLDMATRIXNUMBER, GE_CMD_WORLDMATRIXDATA);
cmds = SaveMatrix(cmds, viewMatrix, ARRAY_SIZE(viewMatrix), GE_CMD_VIEWMATRIXNUMBER, GE_CMD_VIEWMATRIXDATA);
cmds = SaveMatrix(cmds, projMatrix, ARRAY_SIZE(projMatrix), GE_CMD_PROJMATRIXNUMBER, GE_CMD_PROJMATRIXDATA);
cmds = SaveMatrix(cmds, tgenMatrix, ARRAY_SIZE(tgenMatrix), GE_CMD_TGENMATRIXNUMBER, GE_CMD_TGENMATRIXDATA);
cmds = SaveMatrix(cmds, GE_MTX_BONE0, ARRAY_SIZE(boneMatrix), GE_CMD_BONEMATRIXNUMBER, GE_CMD_BONEMATRIXDATA);
cmds = SaveMatrix(cmds, GE_MTX_WORLD, ARRAY_SIZE(worldMatrix), GE_CMD_WORLDMATRIXNUMBER, GE_CMD_WORLDMATRIXDATA);
cmds = SaveMatrix(cmds, GE_MTX_VIEW, ARRAY_SIZE(viewMatrix), GE_CMD_VIEWMATRIXNUMBER, GE_CMD_VIEWMATRIXDATA);
cmds = SaveMatrix(cmds, GE_MTX_PROJECTION, ARRAY_SIZE(projMatrix), GE_CMD_PROJMATRIXNUMBER, GE_CMD_PROJMATRIXDATA);
cmds = SaveMatrix(cmds, GE_MTX_TEXGEN, ARRAY_SIZE(tgenMatrix), GE_CMD_TGENMATRIXNUMBER, GE_CMD_TGENMATRIXDATA);
*cmds++ = boneMatrixNumber;
*cmds++ = worldmtxnum;
@ -199,7 +212,7 @@ void GPUgstate::FastLoadBoneMatrix(u32 addr) {
#endif
num += 12;
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
}
void GPUgstate::Restore(u32_le *ptr) {
@ -244,6 +257,9 @@ void GPUgstate::Restore(u32_le *ptr) {
projmtxnum = *cmds++;
texmtxnum = *cmds++;
}
if (gpu)
gpu->ResetMatrices();
}
bool vertTypeIsSkinningEnabled(u32 vertType) {

View File

@ -341,16 +341,16 @@ const SoftwareCommandTableEntry softgpuCommandTable[] = {
{ GE_CMD_DITH2, 0, SoftDirty::PIXEL_DITHER },
{ GE_CMD_DITH3, 0, SoftDirty::PIXEL_DITHER },
{ GE_CMD_WORLDMATRIXNUMBER },
{ GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_WorldMtxNum },
{ GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_WorldMtxData },
{ GE_CMD_VIEWMATRIXNUMBER },
{ GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ViewMtxNum },
{ GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ViewMtxData },
{ GE_CMD_PROJMATRIXNUMBER },
{ GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ProjMtxNum },
{ GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ProjMtxData },
// Currently not state.
{ GE_CMD_TGENMATRIXNUMBER },
{ GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_TgenMtxNum },
{ GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_TgenMtxData },
{ GE_CMD_BONEMATRIXNUMBER },
{ GE_CMD_BONEMATRIXNUMBER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_BoneMtxNum },
{ GE_CMD_BONEMATRIXDATA, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_BoneMtxData },
// Vertex Screen/Texture/Color
@ -1040,83 +1040,162 @@ void SoftGPU::Execute_VertexType(u32 op, u32 diff) {
}
}
void SoftGPU::Execute_WorldMtxNum(u32 op, u32 diff) {
// Setting 0xFFFFF0 will reset to 0.
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (op & 0xF);
}
void SoftGPU::Execute_ViewMtxNum(u32 op, u32 diff) {
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (op & 0xF);
}
void SoftGPU::Execute_ProjMtxNum(u32 op, u32 diff) {
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (op & 0xF);
}
void SoftGPU::Execute_TgenMtxNum(u32 op, u32 diff) {
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (op & 0xF);
}
void SoftGPU::Execute_BoneMtxNum(u32 op, u32 diff) {
// Setting any bits outside 0x7F are ignored and resets the internal counter.
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (op & 0x7F);
}
void SoftGPU::Execute_WorldMtxData(u32 op, u32 diff) {
int num = gstate.worldmtxnum & 0xF;
u32 *target = num < 12 ? (u32 *)&gstate.worldMatrix[num] : (u32 *)&gstate.viewMatrix[num - 12];
u32 newVal = op << 8;
if (newVal != *target) {
*target = newVal;
dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;
int num = gstate.worldmtxnum & 0x00FFFFFF;
if (num < 12) {
u32 *target = (u32 *)&gstate.worldMatrix[num];
u32 newVal = op << 8;
if (newVal != *target) {
*target = newVal;
dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;
}
}
// Also update the CPU visible values, which update differently.
u32 *target = &matrixVisible.all[12 * 8 + (num & 0xF)];
*target = op & 0x00FFFFFF;
num++;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0xF);
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.worldmtxdata = GE_CMD_WORLDMATRIXDATA << 24;
}
void SoftGPU::Execute_ViewMtxData(u32 op, u32 diff) {
int num = gstate.viewmtxnum & 0xF;
u32 *target = num < 12 ? (u32 *)&gstate.viewMatrix[num] : (u32 *)&gstate.projMatrix[num - 12];
u32 newVal = op << 8;
if (newVal != *target) {
*target = newVal;
dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;
int num = gstate.viewmtxnum & 0x00FFFFFF;
if (num < 12) {
u32 *target = (u32 *)&gstate.viewMatrix[num];
u32 newVal = op << 8;
if (newVal != *target) {
*target = newVal;
dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;
}
}
// Also update the CPU visible values, which update differently.
u32 *target = &matrixVisible.all[12 * 8 + 12 + (num & 0xF)];
*target = op & 0x00FFFFFF;
num++;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0xF);
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.viewmtxdata = GE_CMD_VIEWMATRIXDATA << 24;
}
void SoftGPU::Execute_ProjMtxData(u32 op, u32 diff) {
int num = gstate.projmtxnum & 0xF;
u32 *target = (u32 *)&gstate.projMatrix[num];
u32 newVal = op << 8;
if (newVal != *target) {
*target = newVal;
dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;
int num = gstate.projmtxnum & 0x00FFFFFF;
if (num < 16) {
u32 *target = (u32 *)&gstate.projMatrix[num];
u32 newVal = op << 8;
if (newVal != *target) {
*target = newVal;
dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;
}
}
// Also update the CPU visible values, which update differently.
u32 *target = &matrixVisible.all[12 * 8 + 12 + 12 + (num & 0xF)];
*target = op & 0x00FFFFFF;
num++;
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0xF);
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.projmtxdata = GE_CMD_PROJMATRIXDATA << 24;
}
void SoftGPU::Execute_TgenMtxData(u32 op, u32 diff) {
int num = gstate.texmtxnum & 0xF;
u32 newVal = op << 8;
// Doesn't wrap to any other matrix.
if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
((u32 *)gstate.tgenMatrix)[num] = newVal;
int num = gstate.texmtxnum & 0x00FFFFFF;
if (num < 12) {
u32 *target = (u32 *)&gstate.tgenMatrix[num];
u32 newVal = op << 8;
// No dirtying, read during vertex read.
*target = newVal;
}
// Doesn't wrap to any other matrix.
if ((num & 0xF) < 12) {
matrixVisible.tgen[num & 0xF] = op & 0x00FFFFFF;
}
num++;
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0xF);
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.texmtxdata = GE_CMD_TGENMATRIXDATA << 24;
}
void SoftGPU::Execute_BoneMtxData(u32 op, u32 diff) {
int num = gstate.boneMatrixNumber & 0x7F;
u32 *target;
int num = gstate.boneMatrixNumber & 0x00FFFFFF;
if (num < 96) {
target = (u32 *)&gstate.boneMatrix[num];
} else if (num < 96 + 12) {
target = (u32 *)&gstate.worldMatrix[num - 96];
} else if (num < 96 + 12 + 12) {
target = (u32 *)&gstate.viewMatrix[num - 96 - 12];
} else {
target = (u32 *)&gstate.projMatrix[num - 96 - 12 - 12];
u32 *target = (u32 *)&gstate.boneMatrix[num];
u32 newVal = op << 8;
// No dirtying, we read bone data during vertex read.
*target = newVal;
}
u32 newVal = op << 8;
if (newVal != *target) {
*target = newVal;
// Dirty if it overflowed. We read bone data during vertex read.
if (num >= 96)
dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;
}
// Also update the CPU visible values, which update differently.
u32 *target = &matrixVisible.all[(num & 0x7F)];
*target = op & 0x00FFFFFF;
num++;
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
gstate.boneMatrixData = GE_CMD_BONEMATRIXDATA << 24;
}
static void CopyMatrix24(u32_le *result, const u32 *mtx, u32 count, u32 cmdbits) {
for (u32 i = 0; i < count; ++i) {
result[i] = mtx[i] | cmdbits;
}
}
bool SoftGPU::GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) {
switch (type) {
case GE_MTX_BONE0:
case GE_MTX_BONE1:
case GE_MTX_BONE2:
case GE_MTX_BONE3:
case GE_MTX_BONE4:
case GE_MTX_BONE5:
case GE_MTX_BONE6:
case GE_MTX_BONE7:
CopyMatrix24(result, matrixVisible.bone + (type - GE_MTX_BONE0) * 12, 12, cmdbits);
break;
case GE_MTX_TEXGEN:
CopyMatrix24(result, matrixVisible.tgen, 12, cmdbits);
break;
case GE_MTX_WORLD:
CopyMatrix24(result, matrixVisible.world, 12, cmdbits);
break;
case GE_MTX_VIEW:
CopyMatrix24(result, matrixVisible.view, 12, cmdbits);
break;
case GE_MTX_PROJECTION:
CopyMatrix24(result, matrixVisible.proj, 16, cmdbits);
break;
default:
return false;
}
return true;
}
void SoftGPU::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) {
GPUCommon::Execute_ImmVertexAlphaPrim(op, diff);
// We won't flush as often as hardware renderers, so we want to flush right away.

View File

@ -181,12 +181,20 @@ public:
// Overridden to change flushing behavior.
void Execute_Call(u32 op, u32 diff);
void Execute_WorldMtxNum(u32 op, u32 diff);
void Execute_ViewMtxNum(u32 op, u32 diff);
void Execute_ProjMtxNum(u32 op, u32 diff);
void Execute_TgenMtxNum(u32 op, u32 diff);
void Execute_BoneMtxNum(u32 op, u32 diff);
void Execute_WorldMtxData(u32 op, u32 diff);
void Execute_ViewMtxData(u32 op, u32 diff);
void Execute_ProjMtxData(u32 op, u32 diff);
void Execute_TgenMtxData(u32 op, u32 diff);
void Execute_BoneMtxData(u32 op, u32 diff);
bool GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) override;
void Execute_ImmVertexAlphaPrim(u32 op, u32 diff);
typedef void (SoftGPU::*CmdFunc)(u32 op, u32 diff);