From 337b34ef6a03f56da3a402fab130545833694758 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 30 Aug 2014 22:02:28 -0700 Subject: [PATCH 1/4] Eat cycles during block transfers. Tests show this takes a number very close to 1.9 times the number of bytes in cycles @222. Using that value caused a tiny fps drop in one area of the God of War demo, which may be correct but might also cause complaints. A lot of our other timing is not accurate, so using a lower value (1.6) to be on the safer side. This may cause things to reschedule more accurately when games drawsync, and improves the gpu/commands/blocktransfer test. --- GPU/Directx9/GPU_DX9.cpp | 3 +++ GPU/GLES/GLES_GPU.cpp | 3 +++ GPU/Software/SoftGpu.cpp | 3 +++ 3 files changed, 9 insertions(+) diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index d7a28a6aa..e4f71a695 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -1478,6 +1478,9 @@ void DIRECTX9_GPU::DoBlockTransfer() { dstStride == 512 && height == 272) { framebufferManager_.DrawPixels(Memory::GetPointerUnchecked(dstBasePtr), GE_FORMAT_8888, 512); } + + // TODO: Correct timing appears to be 1.9, but erring a bit low since some of our other timing is inaccurate. + cyclesExecuted += ((height * width * bpp) * 16) / 10; } void DIRECTX9_GPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type) { diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index b186ebf9d..573f1bd3e 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -2000,6 +2000,9 @@ void GLES_GPU::DoBlockTransfer() { CBreakPoints::ExecMemCheck(srcBasePtr + (srcY * srcStride + srcX) * bpp, false, height * srcStride * bpp, currentMIPS->pc); CBreakPoints::ExecMemCheck(dstBasePtr + (srcY * dstStride + srcX) * bpp, true, height * dstStride * bpp, currentMIPS->pc); #endif + + // TODO: Correct timing appears to be 1.9, but erring a bit low since some of our other timing is inaccurate. + cyclesExecuted += ((height * width * bpp) * 16) / 10; } void GLES_GPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type) { diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 2a44e5f7f..4b9ab81f9 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -626,6 +626,9 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) CBreakPoints::ExecMemCheck(dstBasePtr + (srcY * dstStride + srcX) * bpp, true, height * dstStride * bpp, currentMIPS->pc); #endif + // TODO: Correct timing appears to be 1.9, but erring a bit low since some of our other timing is inaccurate. + cyclesExecuted += ((height * width * bpp) * 16) / 10; + // Could theoretically dirty the framebuffer. framebufferDirty_ = true; break; From 514772e18ed61f5d43b6a2e923802b007f24ae8e Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 30 Aug 2014 22:06:25 -0700 Subject: [PATCH 2/4] Avoid some magic numbers. --- GPU/GLES/SoftwareTransform.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/GLES/SoftwareTransform.cpp b/GPU/GLES/SoftwareTransform.cpp index a82159abb..6c6184aa2 100644 --- a/GPU/GLES/SoftwareTransform.cpp +++ b/GPU/GLES/SoftwareTransform.cpp @@ -548,9 +548,9 @@ void TransformDrawEngine::SoftwareTransformAndDraw( glBindBuffer(GL_ARRAY_BUFFER, 0); glVertexAttribPointer(ATTR_POSITION, 4, GL_FLOAT, GL_FALSE, vertexSize, drawBuffer); int attrMask = program->attrMask; - if (attrMask & (1 << ATTR_TEXCOORD)) glVertexAttribPointer(ATTR_TEXCOORD, doTextureProjection ? 3 : 2, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + 4 * 4); - if (attrMask & (1 << ATTR_COLOR0)) glVertexAttribPointer(ATTR_COLOR0, 4, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, ((uint8_t*)drawBuffer) + 7 * 4); - if (attrMask & (1 << ATTR_COLOR1)) glVertexAttribPointer(ATTR_COLOR1, 3, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, ((uint8_t*)drawBuffer) + 8 * 4); + if (attrMask & (1 << ATTR_TEXCOORD)) glVertexAttribPointer(ATTR_TEXCOORD, doTextureProjection ? 3 : 2, GL_FLOAT, GL_FALSE, vertexSize, ((uint8_t*)drawBuffer) + offsetof(TransformedVertex, u)); + if (attrMask & (1 << ATTR_COLOR0)) glVertexAttribPointer(ATTR_COLOR0, 4, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, ((uint8_t*)drawBuffer) + offsetof(TransformedVertex, color0)); + if (attrMask & (1 << ATTR_COLOR1)) glVertexAttribPointer(ATTR_COLOR1, 3, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, ((uint8_t*)drawBuffer) + offsetof(TransformedVertex, color1)); if (drawIndexed) { #if 1 // USING_GLES2 glDrawElements(glprim[prim], numTrans, GL_UNSIGNED_SHORT, inds); From de0a914d3b91aa6c0301b4ad574766be772e22f7 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 30 Aug 2014 22:06:38 -0700 Subject: [PATCH 3/4] Fix a type conversion warning. --- UI/GameInfoCache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/UI/GameInfoCache.cpp b/UI/GameInfoCache.cpp index a5c94ed50..a2d5ec080 100644 --- a/UI/GameInfoCache.cpp +++ b/UI/GameInfoCache.cpp @@ -609,7 +609,7 @@ again: void GameInfoCache::SetupTexture(GameInfo *info, std::string &textureData, Thin3DContext *thin3d, Thin3DTexture *&tex, double &loadTime) { if (textureData.size()) { if (!tex) { - tex = thin3d->CreateTextureFromFileData(textureData.data(), textureData.size(), T3DFileType::PNG); + tex = thin3d->CreateTextureFromFileData(textureData.data(), (int)textureData.size(), T3DFileType::PNG); if (tex) { loadTime = time_now_d(); } else { From e1d2e72480a91b3df5351df9c155f516c9931206 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 30 Aug 2014 22:14:58 -0700 Subject: [PATCH 4/4] Add block transfer to null gpu. For tests, better to have it here. --- GPU/Null/NullGpu.cpp | 50 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/GPU/Null/NullGpu.cpp b/GPU/Null/NullGpu.cpp index cfd358f92..08e3db5ab 100644 --- a/GPU/Null/NullGpu.cpp +++ b/GPU/Null/NullGpu.cpp @@ -16,12 +16,14 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. -#include "NullGpu.h" -#include "../GPUState.h" -#include "../ge_constants.h" -#include "../../Core/MemMap.h" -#include "../../Core/HLE/sceKernelInterrupt.h" -#include "../../Core/HLE/sceGe.h" +#include "GPU/Null/NullGpu.h" +#include "GPU/GPUState.h" +#include "GPU/ge_constants.h" +#include "Core/Debugger/Breakpoints.h" +#include "Core/MemMap.h" +#include "Core/MIPS/MIPS.h" +#include "Core/HLE/sceKernelInterrupt.h" +#include "Core/HLE/sceGe.h" NullGPU::NullGPU() { } NullGPU::~NullGPU() { } @@ -306,10 +308,38 @@ void NullGPU::ExecuteOp(u32 op, u32 diff) { case GE_CMD_TRANSFERSTART: { - DEBUG_LOG(G3D, "DL Texture Transfer Start: PixFormat %i", data); - // TODO: Here we should check if the transfer overlaps a framebuffer or any textures, - // and take appropriate action. If not, this should just be a block transfer within - // GPU memory which could be implemented by a copy loop. + u32 srcBasePtr = gstate.getTransferSrcAddress(); + u32 srcStride = gstate.getTransferSrcStride(); + + u32 dstBasePtr = gstate.getTransferDstAddress(); + u32 dstStride = gstate.getTransferDstStride(); + + int srcX = gstate.getTransferSrcX(); + int srcY = gstate.getTransferSrcY(); + + int dstX = gstate.getTransferDstX(); + int dstY = gstate.getTransferDstY(); + + int width = gstate.getTransferWidth(); + int height = gstate.getTransferHeight(); + + int bpp = gstate.getTransferBpp(); + + DEBUG_LOG(G3D, "Block transfer: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY); + + for (int y = 0; y < height; y++) { + const u8 *src = Memory::GetPointer(srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp); + u8 *dst = Memory::GetPointer(dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp); + memcpy(dst, src, width * bpp); + } + +#ifndef MOBILE_DEVICE + CBreakPoints::ExecMemCheck(srcBasePtr + (srcY * srcStride + srcX) * bpp, false, height * srcStride * bpp, currentMIPS->pc); + CBreakPoints::ExecMemCheck(dstBasePtr + (srcY * dstStride + srcX) * bpp, true, height * dstStride * bpp, currentMIPS->pc); +#endif + + // TODO: Correct timing appears to be 1.9, but erring a bit low since some of our other timing is inaccurate. + cyclesExecuted += ((height * width * bpp) * 16) / 10; break; }