mirror of
https://github.com/libretro/ppsspp.git
synced 2025-01-19 07:04:45 +00:00
Centralize EstimatePerVertexCost and Execute_BlockTransferStart
This commit is contained in:
parent
9400238da6
commit
bd4436c6eb
@ -22,6 +22,7 @@
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/Common/GPUDebugInterface.h"
|
||||
#include "GPU/Common/VertexDecoderCommon.h"
|
||||
|
||||
@ -56,6 +57,36 @@ public:
|
||||
|
||||
std::vector<std::string> DebugGetVertexLoaderIDs();
|
||||
std::string DebugGetVertexLoaderString(std::string id, DebugShaderStringType stringType);
|
||||
|
||||
int EstimatePerVertexCost() {
|
||||
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
|
||||
// runs in parallel with transform.
|
||||
|
||||
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
|
||||
|
||||
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
|
||||
// went too fast and starts doing all the work over again).
|
||||
|
||||
int cost = 20;
|
||||
if (gstate.isLightingEnabled()) {
|
||||
cost += 10;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (gstate.isLightChanEnabled(i))
|
||||
cost += 10;
|
||||
}
|
||||
}
|
||||
|
||||
if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) {
|
||||
cost += 20;
|
||||
}
|
||||
int morphCount = gstate.getNumMorphWeights();
|
||||
if (morphCount > 1) {
|
||||
cost += 5 * morphCount;
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Preprocessing for spline/bezier
|
||||
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType);
|
||||
|
@ -135,36 +135,6 @@ public:
|
||||
void SetupVertexDecoder(u32 vertType);
|
||||
void SetupVertexDecoderInternal(u32 vertType);
|
||||
|
||||
// This requires a SetupVertexDecoder call first.
|
||||
int EstimatePerVertexCost() {
|
||||
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
|
||||
// runs in parallel with transform.
|
||||
|
||||
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
|
||||
|
||||
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
|
||||
// went too fast and starts doing all the work over again).
|
||||
|
||||
int cost = 20;
|
||||
if (gstate.isLightingEnabled()) {
|
||||
cost += 10;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (gstate.isLightChanEnabled(i))
|
||||
cost += 10;
|
||||
}
|
||||
}
|
||||
|
||||
if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) {
|
||||
cost += 20;
|
||||
}
|
||||
if (dec_ && dec_->morphcount > 1) {
|
||||
cost += 5 * dec_->morphcount;
|
||||
}
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
||||
// So that this can be inlined
|
||||
void Flush() {
|
||||
if (!numDrawCalls)
|
||||
|
@ -332,7 +332,7 @@ static const CommandTableEntry commandTable[] = {
|
||||
|
||||
// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
|
||||
{GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, &GPU_DX9::Execute_LoadClut},
|
||||
{GE_CMD_TRANSFERSTART, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC},
|
||||
{GE_CMD_TRANSFERSTART, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC, &GPU_DX9::Execute_BlockTransferStart},
|
||||
|
||||
// We don't use the dither table.
|
||||
{GE_CMD_DITH0},
|
||||
@ -1311,7 +1311,6 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
|
||||
Execute_Prim(op, diff);
|
||||
break;
|
||||
|
||||
// The arrow and other rotary items in Puzbob are bezier patches, strangely enough.
|
||||
case GE_CMD_BEZIER:
|
||||
Execute_Bezier(op, diff);
|
||||
break;
|
||||
@ -1452,17 +1451,9 @@ void GPU_DX9::Execute_Generic(u32 op, u32 diff) {
|
||||
case GE_CMD_TRANSFERSIZE:
|
||||
break;
|
||||
|
||||
case GE_CMD_TRANSFERSTART: // Orphis calls this TRXKICK
|
||||
{
|
||||
// TODO: Here we should check if the transfer overlaps a framebuffer or any textures,
|
||||
// and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa.
|
||||
// Can we skip this entirely on SkipDraw? It skips some things internally.
|
||||
DoBlockTransfer(gstate_c.skipDrawReason);
|
||||
|
||||
// Fixes Gran Turismo's funky text issue, since it overwrites the current texture.
|
||||
gstate_c.textureChanged = TEXCHANGE_UPDATED;
|
||||
break;
|
||||
}
|
||||
case GE_CMD_TRANSFERSTART:
|
||||
Execute_BlockTransferStart(op, diff);
|
||||
break;
|
||||
|
||||
case GE_CMD_TEXSIZE0:
|
||||
Execute_TexSize0(op, diff);
|
||||
|
@ -272,7 +272,7 @@ static void ConvertProjMatrixToD3DThrough(Matrix4x4 &in) {
|
||||
in.translateAndScale(Vec3(xoff, yoff, 0.5f), Vec3(1.0f, 1.0f, 0.5f));
|
||||
}
|
||||
|
||||
void ShaderManagerDX9::PSUpdateUniforms(int dirtyUniforms) {
|
||||
void ShaderManagerDX9::PSUpdateUniforms(u32 dirtyUniforms) {
|
||||
if (dirtyUniforms & DIRTY_TEXENV) {
|
||||
PSSetColorUniform3(CONST_PS_TEXENV, gstate.texenvcolor);
|
||||
}
|
||||
@ -324,7 +324,7 @@ void ShaderManagerDX9::PSUpdateUniforms(int dirtyUniforms) {
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) {
|
||||
void ShaderManagerDX9::VSUpdateUniforms(u32 dirtyUniforms) {
|
||||
// Update any dirty uniforms before we draw
|
||||
if (dirtyUniforms & DIRTY_PROJMATRIX) {
|
||||
Matrix4x4 flippedMatrix;
|
||||
|
@ -137,8 +137,8 @@ public:
|
||||
std::string DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType);
|
||||
|
||||
private:
|
||||
void PSUpdateUniforms(int dirtyUniforms);
|
||||
void VSUpdateUniforms(int dirtyUniforms);
|
||||
void PSUpdateUniforms(u32 dirtyUniforms);
|
||||
void VSUpdateUniforms(u32 dirtyUniforms);
|
||||
void PSSetColorUniform3Alpha255(int creg, u32 color, u8 alpha);
|
||||
void PSSetColorUniform3(int creg, u32 color);
|
||||
void PSSetFloat(int creg, float value);
|
||||
|
@ -139,36 +139,6 @@ public:
|
||||
void SetupVertexDecoder(u32 vertType);
|
||||
inline void SetupVertexDecoderInternal(u32 vertType);
|
||||
|
||||
// This requires a SetupVertexDecoder call first.
|
||||
int EstimatePerVertexCost() {
|
||||
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
|
||||
// runs in parallel with transform.
|
||||
|
||||
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
|
||||
|
||||
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
|
||||
// went too fast and starts doing all the work over again).
|
||||
|
||||
int cost = 20;
|
||||
if (gstate.isLightingEnabled()) {
|
||||
cost += 10;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (gstate.isLightChanEnabled(i))
|
||||
cost += 10;
|
||||
}
|
||||
}
|
||||
|
||||
if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) {
|
||||
cost += 20;
|
||||
}
|
||||
if (dec_ && dec_->morphcount > 1) {
|
||||
cost += 5 * dec_->morphcount;
|
||||
}
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
||||
// So that this can be inlined
|
||||
void Flush() {
|
||||
if (!numDrawCalls)
|
||||
|
@ -335,7 +335,7 @@ static const CommandTableEntry commandTable[] = {
|
||||
|
||||
// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
|
||||
{GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_GLES::Execute_LoadClut},
|
||||
{GE_CMD_TRANSFERSTART, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC, 0, &GPU_GLES::Execute_BlockTransferStart},
|
||||
{GE_CMD_TRANSFERSTART, FLAG_FLUSHBEFORE | FLAG_EXECUTE | FLAG_READS_PC, 0, &GPUCommon::Execute_BlockTransferStart},
|
||||
|
||||
// We don't use the dither table.
|
||||
{GE_CMD_DITH0},
|
||||
@ -1512,16 +1512,6 @@ void GPU_GLES::Execute_BoneMtxData(u32 op, u32 diff) {
|
||||
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
|
||||
}
|
||||
|
||||
void GPU_GLES::Execute_BlockTransferStart(u32 op, u32 diff) {
|
||||
// TODO: Here we should check if the transfer overlaps a framebuffer or any textures,
|
||||
// and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa.
|
||||
// Can we skip this on SkipDraw?
|
||||
DoBlockTransfer(gstate_c.skipDrawReason);
|
||||
|
||||
// Fixes Gran Turismo's funky text issue, since it overwrites the current texture.
|
||||
gstate_c.textureChanged = TEXCHANGE_UPDATED;
|
||||
}
|
||||
|
||||
void GPU_GLES::Execute_Generic(u32 op, u32 diff) {
|
||||
u32 cmd = op >> 24;
|
||||
u32 data = op & 0xFFFFFF;
|
||||
@ -1543,7 +1533,6 @@ void GPU_GLES::Execute_Generic(u32 op, u32 diff) {
|
||||
Execute_Prim(op, diff);
|
||||
break;
|
||||
|
||||
// The arrow and other rotary items in Puzbob are bezier patches, strangely enough.
|
||||
case GE_CMD_BEZIER:
|
||||
Execute_Bezier(op, diff);
|
||||
break;
|
||||
@ -1692,7 +1681,7 @@ void GPU_GLES::Execute_Generic(u32 op, u32 diff) {
|
||||
case GE_CMD_TRANSFERSIZE:
|
||||
break;
|
||||
|
||||
case GE_CMD_TRANSFERSTART: // Orphis calls this TRXKICK
|
||||
case GE_CMD_TRANSFERSTART:
|
||||
Execute_BlockTransferStart(op, diff);
|
||||
break;
|
||||
|
||||
|
@ -140,7 +140,6 @@ public:
|
||||
void Execute_TgenMtxData(u32 op, u32 diff);
|
||||
void Execute_BoneMtxNum(u32 op, u32 diff);
|
||||
void Execute_BoneMtxData(u32 op, u32 diff);
|
||||
void Execute_BlockTransferStart(u32 op, u32 diff);
|
||||
|
||||
// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
|
||||
std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
|
||||
|
@ -547,12 +547,6 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
|
||||
glUniform2fv(u_fogcoef, 1, fogcoef);
|
||||
}
|
||||
|
||||
// Texturing
|
||||
|
||||
// If this dirty check is changed to true, Frontier Gate Boost works in texcoord speedhack mode.
|
||||
// This means that it's not a flushing issue.
|
||||
// It uses GE_TEXMAP_TEXTURE_MATRIX with GE_PROJMAP_UV a lot.
|
||||
// Can't figure out why it doesn't dirty at the right points though...
|
||||
if (dirty & DIRTY_UVSCALEOFFSET) {
|
||||
const float invW = 1.0f / (float)gstate_c.curTextureWidth;
|
||||
const float invH = 1.0f / (float)gstate_c.curTextureHeight;
|
||||
|
@ -1153,6 +1153,16 @@ void GPUCommon::Execute_BoundingBox(u32 op, u32 diff) {
|
||||
}
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_BlockTransferStart(u32 op, u32 diff) {
|
||||
// TODO: Here we should check if the transfer overlaps a framebuffer or any textures,
|
||||
// and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa.
|
||||
// Can we skip this on SkipDraw?
|
||||
DoBlockTransfer(gstate_c.skipDrawReason);
|
||||
|
||||
// Fixes Gran Turismo's funky text issue, since it overwrites the current texture.
|
||||
gstate_c.textureChanged = TEXCHANGE_UPDATED;
|
||||
}
|
||||
|
||||
void GPUCommon::ExecuteOp(u32 op, u32 diff) {
|
||||
const u32 cmd = op >> 24;
|
||||
|
||||
|
@ -83,6 +83,7 @@ public:
|
||||
void Execute_Bezier(u32 op, u32 diff);
|
||||
void Execute_Spline(u32 op, u32 diff);
|
||||
void Execute_BoundingBox(u32 op, u32 diff);
|
||||
void Execute_BlockTransferStart(u32 op, u32 diff);
|
||||
|
||||
u64 GetTickEstimate() override {
|
||||
#if defined(_M_X64) || defined(__ANDROID__)
|
||||
|
@ -393,6 +393,7 @@ struct GPUgstate {
|
||||
bool isModeThrough() const { return (vertType & GE_VTYPE_THROUGH) != 0; }
|
||||
bool areNormalsReversed() const { return reversenormals & 1; }
|
||||
bool isSkinningEnabled() const { return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE); }
|
||||
int getNumMorphWeights() const { return ((vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT) + 1; }
|
||||
|
||||
GEPatchPrimType getPatchPrimitiveType() const { return static_cast<GEPatchPrimType>(patchprimitive & 3); }
|
||||
bool isPatchNormalsReversed() const { return patchfacing & 1; }
|
||||
|
@ -135,7 +135,6 @@ void GeDisassembleOp(u32 pc, u32 op, u32 prev, char *buffer, int bufsize) {
|
||||
}
|
||||
break;
|
||||
|
||||
// The arrow and other rotary items in Puzbob are bezier patches, strangely enough.
|
||||
case GE_CMD_BEZIER:
|
||||
{
|
||||
int bz_ucount = data & 0xFF;
|
||||
@ -521,7 +520,7 @@ void GeDisassembleOp(u32 pc, u32 op, u32 prev, char *buffer, int bufsize) {
|
||||
break;
|
||||
}
|
||||
|
||||
case GE_CMD_TRANSFERSTART: // Orphis calls this TRXKICK
|
||||
case GE_CMD_TRANSFERSTART:
|
||||
if (data & ~1)
|
||||
snprintf(buffer, bufsize, "Block transfer start: %d (extra %x)", data & 1, data & ~1);
|
||||
else
|
||||
|
@ -77,7 +77,6 @@ void NullGPU::ExecuteOp(u32 op, u32 diff) {
|
||||
}
|
||||
break;
|
||||
|
||||
// The arrow and other rotary items in Puzbob are bezier patches, strangely enough.
|
||||
case GE_CMD_BEZIER:
|
||||
{
|
||||
int bz_ucount = data & 0xFF;
|
||||
@ -92,7 +91,6 @@ void NullGPU::ExecuteOp(u32 op, u32 diff) {
|
||||
int sp_vcount = (data >> 8) & 0xFF;
|
||||
int sp_utype = (data >> 16) & 0x3;
|
||||
int sp_vtype = (data >> 18) & 0x3;
|
||||
//drawSpline(sp_ucount, sp_vcount, sp_utype, sp_vtype);
|
||||
DEBUG_LOG(G3D,"DL DRAW SPLINE: %i x %i, %i x %i", sp_ucount, sp_vcount, sp_utype, sp_vtype);
|
||||
}
|
||||
break;
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include "GPU/Common/GPUDebugInterface.h"
|
||||
#include "thin3d/thin3d.h"
|
||||
|
||||
typedef struct {
|
||||
struct FormatBuffer {
|
||||
union {
|
||||
u8 *data;
|
||||
u16 *as16;
|
||||
@ -43,7 +43,7 @@ typedef struct {
|
||||
inline u32 Get32(int x, int y, int stride) {
|
||||
return as32[x + y * stride];
|
||||
}
|
||||
} FormatBuffer;
|
||||
};
|
||||
|
||||
class ShaderManagerGLES;
|
||||
|
||||
|
@ -94,36 +94,6 @@ public:
|
||||
void SetupVertexDecoder(u32 vertType);
|
||||
void SetupVertexDecoderInternal(u32 vertType);
|
||||
|
||||
// This requires a SetupVertexDecoder call first.
|
||||
int EstimatePerVertexCost() {
|
||||
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
|
||||
// runs in parallel with transform.
|
||||
|
||||
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
|
||||
|
||||
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
|
||||
// went too fast and starts doing all the work over again).
|
||||
|
||||
int cost = 20;
|
||||
if (gstate.isLightingEnabled()) {
|
||||
cost += 10;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (gstate.isLightChanEnabled(i))
|
||||
cost += 10;
|
||||
}
|
||||
}
|
||||
|
||||
if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) {
|
||||
cost += 20;
|
||||
}
|
||||
if (dec_ && dec_->morphcount > 1) {
|
||||
cost += 5 * dec_->morphcount;
|
||||
}
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
||||
// So that this can be inlined
|
||||
void Flush(VkCommandBuffer cmd) {
|
||||
if (!numDrawCalls)
|
||||
|
@ -1301,16 +1301,6 @@ void GPU_Vulkan::Execute_BoneMtxData(u32 op, u32 diff) {
|
||||
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x7F);
|
||||
}
|
||||
|
||||
void GPU_Vulkan::Execute_BlockTransferStart(u32 op, u32 diff) {
|
||||
// TODO: Here we should check if the transfer overlaps a framebuffer or any textures,
|
||||
// and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa.
|
||||
// Can we skip this on SkipDraw?
|
||||
DoBlockTransfer(gstate_c.skipDrawReason);
|
||||
|
||||
// Fixes Gran Turismo's funky text issue, since it overwrites the current texture.
|
||||
gstate_c.textureChanged = TEXCHANGE_UPDATED;
|
||||
}
|
||||
|
||||
void GPU_Vulkan::Execute_Generic(u32 op, u32 diff) {
|
||||
u32 cmd = op >> 24;
|
||||
u32 data = op & 0xFFFFFF;
|
||||
@ -1332,7 +1322,6 @@ void GPU_Vulkan::Execute_Generic(u32 op, u32 diff) {
|
||||
Execute_Prim(op, diff);
|
||||
break;
|
||||
|
||||
// The arrow and other rotary items in Puzbob are bezier patches, strangely enough.
|
||||
case GE_CMD_BEZIER:
|
||||
Execute_Bezier(op, diff);
|
||||
break;
|
||||
@ -1481,7 +1470,7 @@ void GPU_Vulkan::Execute_Generic(u32 op, u32 diff) {
|
||||
case GE_CMD_TRANSFERSIZE:
|
||||
break;
|
||||
|
||||
case GE_CMD_TRANSFERSTART: // Orphis calls this TRXKICK
|
||||
case GE_CMD_TRANSFERSTART:
|
||||
Execute_BlockTransferStart(op, diff);
|
||||
break;
|
||||
|
||||
|
@ -127,7 +127,6 @@ public:
|
||||
void Execute_TgenMtxData(u32 op, u32 diff);
|
||||
void Execute_BoneMtxNum(u32 op, u32 diff);
|
||||
void Execute_BoneMtxData(u32 op, u32 diff);
|
||||
void Execute_BlockTransferStart(u32 op, u32 diff);
|
||||
|
||||
// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
|
||||
std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
|
||||
|
@ -674,7 +674,7 @@ QString Debugger_DisplayList::DisassembleOp(u32 pc, u32 op, u32 prev, const GPUg
|
||||
return QString("Block Transfer Rect Size: %1 x %2").arg(w).arg(h);
|
||||
}
|
||||
|
||||
case GE_CMD_TRANSFERSTART: // Orphis calls this TRXKICK
|
||||
case GE_CMD_TRANSFERSTART:
|
||||
{
|
||||
return QString("Block Transfer Start : %1").arg(data ? "32-bit texel size" : "16-bit texel size");
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user