Merge pull request #6141 from unknownbrackets/gpu-blocktransfer

Fix a heap overflow in block transfers, fix most save picture downloads, optimizations
This commit is contained in:
Henrik Rydgård 2014-05-26 10:53:02 +02:00
commit eead104ebe
14 changed files with 344 additions and 151 deletions

View File

@ -104,7 +104,11 @@ static int Replace_memcpy() {
u32 destPtr = PARAM(0);
u32 srcPtr = PARAM(1);
u32 bytes = PARAM(2);
if (bytes != 0) {
bool skip = false;
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
skip = gpu->UpdateMemory(destPtr, srcPtr, bytes);
}
if (!skip && bytes != 0) {
u8 *dst = Memory::GetPointerUnchecked(destPtr);
u8 *src = Memory::GetPointerUnchecked(srcPtr);
memmove(dst, src, bytes);
@ -114,9 +118,6 @@ static int Replace_memcpy() {
CBreakPoints::ExecMemCheck(srcPtr, false, bytes, currentMIPS->pc);
CBreakPoints::ExecMemCheck(destPtr, true, bytes, currentMIPS->pc);
#endif
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
gpu->UpdateMemory(destPtr, srcPtr, bytes);
}
return 10 + bytes / 4; // approximation
}
@ -124,7 +125,11 @@ static int Replace_memcpy16() {
u32 destPtr = PARAM(0);
u32 srcPtr = PARAM(1);
u32 bytes = PARAM(2) * 16;
if (bytes != 0) {
bool skip = false;
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
skip = gpu->UpdateMemory(destPtr, srcPtr, bytes);
}
if (!skip && bytes != 0) {
u8 *dst = Memory::GetPointerUnchecked(destPtr);
u8 *src = Memory::GetPointerUnchecked(srcPtr);
memmove(dst, src, bytes);
@ -134,9 +139,6 @@ static int Replace_memcpy16() {
CBreakPoints::ExecMemCheck(srcPtr, false, bytes, currentMIPS->pc);
CBreakPoints::ExecMemCheck(destPtr, true, bytes, currentMIPS->pc);
#endif
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
gpu->UpdateMemory(destPtr, srcPtr, bytes);
}
return 10 + bytes / 4; // approximation
}
@ -144,7 +146,11 @@ static int Replace_memmove() {
u32 destPtr = PARAM(0);
u32 srcPtr = PARAM(1);
u32 bytes = PARAM(2);
if (bytes != 0) {
bool skip = false;
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
skip = gpu->UpdateMemory(destPtr, srcPtr, bytes);
}
if (!skip && bytes != 0) {
u8 *dst = Memory::GetPointerUnchecked(destPtr);
u8 *src = Memory::GetPointerUnchecked(srcPtr);
memmove(dst, src, bytes);
@ -154,9 +160,6 @@ static int Replace_memmove() {
CBreakPoints::ExecMemCheck(srcPtr, false, bytes, currentMIPS->pc);
CBreakPoints::ExecMemCheck(destPtr, true, bytes, currentMIPS->pc);
#endif
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
gpu->UpdateMemory(destPtr, srcPtr, bytes);
}
return 10 + bytes / 4; // approximation
}
@ -165,14 +168,17 @@ static int Replace_memset() {
u8 *dst = Memory::GetPointerUnchecked(destPtr);
u8 value = PARAM(1);
u32 bytes = PARAM(2);
memset(dst, value, bytes);
bool skip = false;
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(destPtr)) {
skip = gpu->UpdateMemory(destPtr, destPtr, bytes);
}
if (!skip) {
memset(dst, value, bytes);
}
RETURN(destPtr);
#ifndef MOBILE_DEVICE
CBreakPoints::ExecMemCheck(destPtr, true, bytes, currentMIPS->pc);
#endif
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(destPtr)) {
gpu->UpdateMemory(destPtr, destPtr, bytes);
}
return 10 + bytes / 4; // approximation
}

View File

@ -43,16 +43,17 @@ void __DmacDoState(PointerWrap &p) {
}
int __DmacMemcpy(u32 dst, u32 src, u32 size) {
Memory::Memcpy(dst, Memory::GetPointer(src), size);
#ifndef MOBILE_DEVICE
CBreakPoints::ExecMemCheck(src, false, size, currentMIPS->pc);
CBreakPoints::ExecMemCheck(dst, true, size, currentMIPS->pc);
#endif
src &= ~0x40000000;
dst &= ~0x40000000;
bool skip = false;
if (Memory::IsVRAMAddress(src) || Memory::IsVRAMAddress(dst)) {
gpu->UpdateMemory(dst, src, size);
skip = gpu->UpdateMemory(dst, src, size);
}
if (!skip) {
Memory::Memcpy(dst, Memory::GetPointer(src), size);
}
// This number seems strangely reproducible.

View File

@ -559,11 +559,14 @@ u32 sceKernelMemset(u32 addr, u32 fillc, u32 n)
u32 sceKernelMemcpy(u32 dst, u32 src, u32 size)
{
DEBUG_LOG(SCEKERNEL, "sceKernelMemcpy(dest=%08x, src=%08x, size=%i)", dst, src, size);
// Hm, sceDmacMemcpy seems to be the popular one for this. Ignoring for now.
// gpu->UpdateMemory(dst, src, size);
bool skip = false;
if (Memory::IsVRAMAddress(src) || Memory::IsVRAMAddress(dst)) {
skip = gpu->UpdateMemory(dst, src, size);
}
// Technically should crash if these are invalid and size > 0...
if (Memory::IsValidAddress(dst) && Memory::IsValidAddress(src) && Memory::IsValidAddress(dst + size - 1) && Memory::IsValidAddress(src + size - 1))
if (!skip && Memory::IsValidAddress(dst) && Memory::IsValidAddress(src) && Memory::IsValidAddress(dst + size - 1) && Memory::IsValidAddress(src + size - 1))
{
u8 *dstp = Memory::GetPointer(dst);
u8 *srcp = Memory::GetPointer(src);

View File

@ -1315,8 +1315,9 @@ void DIRECTX9_GPU::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationTy
framebufferManager_.UpdateFromMemory(addr, size);
}
void DIRECTX9_GPU::UpdateMemory(u32 dest, u32 src, int size) {
bool DIRECTX9_GPU::UpdateMemory(u32 dest, u32 src, int size) {
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
return false;
}
void DIRECTX9_GPU::ClearCacheNextFrame() {

View File

@ -46,7 +46,7 @@ public:
virtual void BeginFrame();
virtual void UpdateStats();
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type);
virtual void UpdateMemory(u32 dest, u32 src, int size);
virtual bool UpdateMemory(u32 dest, u32 src, int size);
virtual void ClearCacheNextFrame();
virtual void DeviceLost(); // Only happens on Android. Drop all textures and shaders.

View File

@ -318,7 +318,7 @@ FramebufferManager::FramebufferManager() :
currentRenderVfb_(0),
drawPixelsTex_(0),
drawPixelsTexFormat_(GE_FORMAT_INVALID),
convBuf(0),
convBuf_(0),
draw2dprogram_(0),
postShaderProgram_(0),
plainColorLoc_(-1),
@ -327,7 +327,9 @@ FramebufferManager::FramebufferManager() :
shaderManager_(0),
usePostShader_(false),
postShaderAtOutputResolution_(false),
resized_(false)
resized_(false),
gameUsesSequentialCopies_(false),
framebufRangeEnd_(0)
#ifndef USING_GLES2
,
pixelBufObj_(0),
@ -361,7 +363,7 @@ FramebufferManager::~FramebufferManager() {
#ifndef USING_GLES2
delete [] pixelBufObj_;
#endif
delete [] convBuf;
delete [] convBuf_;
}
void FramebufferManager::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
@ -394,15 +396,18 @@ void FramebufferManager::MakePixelTexture(const u8 *srcPixels, GEBufferFormat sr
bool useConvBuf = false;
if (srcPixelFormat != GE_FORMAT_8888 || srcStride != width) {
useConvBuf = true;
if (!convBuf) {
convBuf = new u8[width * height * 4];
u32 neededSize = width * height * 4;
if (!convBuf_ || convBufSize_ < neededSize) {
delete [] convBuf_;
convBuf_ = new u8[neededSize];
convBufSize_ = neededSize;
}
for (int y = 0; y < height; y++) {
switch (srcPixelFormat) {
case GE_FORMAT_565:
{
const u16 *src = (const u16 *)srcPixels + srcStride * y;
u8 *dst = convBuf + 4 * width * y;
u8 *dst = convBuf_ + 4 * width * y;
for (int x = 0; x < width; x++)
{
u16 col = src[x];
@ -417,7 +422,7 @@ void FramebufferManager::MakePixelTexture(const u8 *srcPixels, GEBufferFormat sr
case GE_FORMAT_5551:
{
const u16 *src = (const u16 *)srcPixels + srcStride * y;
u8 *dst = convBuf + 4 * width * y;
u8 *dst = convBuf_ + 4 * width * y;
for (int x = 0; x < width; x++)
{
u16 col = src[x];
@ -432,7 +437,7 @@ void FramebufferManager::MakePixelTexture(const u8 *srcPixels, GEBufferFormat sr
case GE_FORMAT_4444:
{
const u16 *src = (const u16 *)srcPixels + srcStride * y;
u8 *dst = convBuf + 4 * width * y;
u8 *dst = convBuf_ + 4 * width * y;
for (int x = 0; x < width; x++)
{
u16 col = src[x];
@ -447,7 +452,7 @@ void FramebufferManager::MakePixelTexture(const u8 *srcPixels, GEBufferFormat sr
case GE_FORMAT_8888:
{
const u8 *src = srcPixels + srcStride * 4 * y;
u8 *dst = convBuf + 4 * width * y;
u8 *dst = convBuf_ + 4 * width * y;
memcpy(dst, src, 4 * width);
}
break;
@ -458,7 +463,7 @@ void FramebufferManager::MakePixelTexture(const u8 *srcPixels, GEBufferFormat sr
}
}
}
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, useConvBuf ? convBuf : srcPixels);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, useConvBuf ? convBuf_ : srcPixels);
}
void FramebufferManager::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
@ -833,6 +838,12 @@ void FramebufferManager::DoSetRenderFrameBuffer() {
glEnable(GL_DITHER); // why?
currentRenderVfb_ = vfb;
u32 byteSize = FramebufferByteSize(vfb);
u32 fb_address_mem = (fb_address & 0x3FFFFFFF) | 0x04000000;
if (fb_address_mem + byteSize > framebufRangeEnd_) {
framebufRangeEnd_ = fb_address_mem + byteSize;
}
INFO_LOG(SCEGE, "Creating FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->format);
// Let's check for depth buffer overlap. Might be interesting.
@ -1230,20 +1241,42 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s
#endif
}
vfb->memoryUpdated = true;
BlitFramebuffer_(nvfb, x, y, vfb, x, y, w, h, 0);
if (gameUsesSequentialCopies_) {
// Ignore the x/y/etc., read the entire thing.
x = 0;
y = 0;
w = vfb->width;
h = vfb->height;
}
if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
vfb->memoryUpdated = true;
} else {
const static int FREQUENT_SEQUENTIAL_COPIES = 3;
static int frameLastCopy = 0;
static u32 bufferLastCopy = 0;
static int copiesThisFrame = 0;
if (frameLastCopy != gpuStats.numFlips || bufferLastCopy != vfb->fb_address) {
frameLastCopy = gpuStats.numFlips;
bufferLastCopy = vfb->fb_address;
copiesThisFrame = 0;
}
if (++copiesThisFrame > FREQUENT_SEQUENTIAL_COPIES) {
gameUsesSequentialCopies_ = true;
}
}
BlitFramebuffer_(nvfb, x, y, vfb, x, y, w, h, 0, true);
// PackFramebufferSync_() - Synchronous pixel data transfer using glReadPixels
// PackFramebufferAsync_() - Asynchronous pixel data transfer using glReadPixels with PBOs
#ifdef USING_GLES2
PackFramebufferSync_(nvfb);
PackFramebufferSync_(nvfb, x, y, w, h);
#else
if (gl_extensions.PBO_ARB && gl_extensions.OES_texture_npot) {
if (!sync) {
PackFramebufferAsync_(nvfb);
} else {
PackFramebufferSync_(nvfb);
PackFramebufferSync_(nvfb, x, y, w, h);
}
}
#endif
@ -1254,7 +1287,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s
}
// TODO: If dimensions are the same, we can use glCopyImageSubData.
void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) {
void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip) {
if (!dst->fbo) {
ERROR_LOG_REPORT_ONCE(dstfbozero, SCEGE, "BlitFramebuffer_: dst->fbo == 0");
fbo_unbind();
@ -1300,6 +1333,11 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int
int dstY2 = dst->renderHeight - (h + dstY) * dstYFactor;
int dstY1 = dstY2 + h * dstYFactor;
if (flip) {
dstY1 = dst->renderHeight - dstY1;
dstY2 = dst->renderHeight - dstY2;
}
#ifdef MAY_HAVE_GLES3
fbo_bind_for_read(src->fbo);
if (!useNV) {
@ -1577,7 +1615,7 @@ void FramebufferManager::PackFramebufferAsync_(VirtualFramebuffer *vfb) {
#endif
void FramebufferManager::PackFramebufferSync_(VirtualFramebuffer *vfb) {
void FramebufferManager::PackFramebufferSync_(VirtualFramebuffer *vfb, int x, int y, int w, int h) {
if (vfb->fbo) {
fbo_bind_for_read(vfb->fbo);
} else {
@ -1600,7 +1638,13 @@ void FramebufferManager::PackFramebufferSync_(VirtualFramebuffer *vfb) {
if (!convert) {
packed = (GLubyte *)Memory::GetPointer(fb_address);
} else { // End result may be 16-bit but we are reading 32-bit, so there may not be enough space at fb_address
packed = (GLubyte *)malloc(bufSize * sizeof(GLubyte));
u32 neededSize = (u32)bufSize * sizeof(GLubyte);
if (!convBuf_ || convBufSize_ < neededSize) {
delete [] convBuf_;
convBuf_ = new u8[neededSize];
convBufSize_ = neededSize;
}
packed = convBuf_;
}
if (packed) {
@ -1614,12 +1658,12 @@ void FramebufferManager::PackFramebufferSync_(VirtualFramebuffer *vfb) {
glfmt = GL_BGRA_EXT;
}
#endif
glReadPixels(0, 0, vfb->fb_stride, vfb->height, glfmt, GL_UNSIGNED_BYTE, packed);
int byteOffset = y * vfb->fb_stride * 4;
glReadPixels(0, y, vfb->fb_stride, h, glfmt, GL_UNSIGNED_BYTE, packed + byteOffset);
// LogReadPixelsError(glGetError());
if (convert) {
ConvertFromRGBA8888(Memory::GetPointer(fb_address), packed, vfb->fb_stride, vfb->height, vfb->format);
free(packed);
ConvertFromRGBA8888(Memory::GetPointer(fb_address + byteOffset), packed + byteOffset, vfb->fb_stride, h, vfb->format);
}
}
@ -1790,14 +1834,20 @@ void FramebufferManager::UpdateFromMemory(u32 addr, int size, bool safe) {
}
}
void FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size) {
bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size) {
if (!(g_Config.iRenderingMode == FB_BUFFERED_MODE)) {
return false;
}
// MotoGP workaround
for (size_t i = 0; i < vfbs_.size(); i++) {
int bpp = vfbs_[i]->format == GE_FORMAT_8888 ? 4 : 2;
int fsize = vfbs_[i]->fb_stride * vfbs_[i]->height * (vfbs_[i]->format == GE_FORMAT_8888 ? 4 : 2);
if ((vfbs_[i]->fb_address | 0x04000000) == src && size == fsize) {
// A framebuffer matched!
knownFramebufferRAMCopies_.insert(std::pair<u32, u32>(src, dst));
if (Memory::IsVRAMAddress(src) && Memory::IsRAMAddress(dst)) {
for (size_t i = 0; i < vfbs_.size(); i++) {
int bpp = vfbs_[i]->format == GE_FORMAT_8888 ? 4 : 2;
int fsize = FramebufferByteSize(vfbs_[i]);
if (MaskedEqual(vfbs_[i]->fb_address, src) && size == fsize) {
// A framebuffer matched!
knownFramebufferRAMCopies_.insert(std::pair<u32, u32>(src, dst));
}
}
}
@ -1819,37 +1869,126 @@ void FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size) {
if (srcBuffer == dstBuffer) {
WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x", src, dst);
} else {
WARN_LOG_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x", src, dst);
WARN_LOG_REPORT_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy (not supported) %08x -> %08x", src, dst);
// Just do the blit!
// TODO: Possibly take bpp into account somehow if games are doing really crazy things?
// if (g_Config.bBlockTransferGPU) {
// BlitFramebuffer_(dstBuffer, 0, 0, srcBuffer, 0, 0, srcBuffer->width, srcBuffer->height, 0);
// }
}
Memory::Memcpy(dst, Memory::GetPointer(src), size);
return true;
} else if (dstBuffer) {
WARN_LOG_REPORT_ONCE(btucpy, G3D, "Memcpy fbo upload (not supported) %08x -> %08x", src, dst);
// Here we should just draw the pixels into the buffer.
// if (g_Config.bBlockTransferGPU) {
// }
} else if (srcBuffer && g_Config.iRenderingMode == FB_BUFFERED_MODE) {
WARN_LOG_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst);
// if (g_Config.bBlockTransferGPU) {
// ReadFramebufferToMemory(srcBuffer, true, 0, 0, srcBuffer->width, srcBuffer->height);
// }
WARN_LOG_REPORT_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x", src, dst);
if (g_Config.bBlockTransferGPU) {
const u8 *srcBase = Memory::GetPointerUnchecked(src);
fbo_bind_as_render_target(dstBuffer->fbo);
glViewport(0, 0, dstBuffer->renderWidth, dstBuffer->renderHeight);
// TODO: Validate x/y/w/h based on size and offset?
DrawPixels(dstBuffer, 0, 0, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstBuffer->height);
dstBuffer->dirtyAfterDisplay = true;
if ((gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
dstBuffer->reallyDirtyAfterDisplay = true;
if (currentRenderVfb_) {
fbo_bind_as_render_target(currentRenderVfb_->fbo);
} else {
fbo_unbind();
}
glstate.viewport.restore();
gstate_c.textureChanged = TEXCHANGE_PARAMSONLY;
// This is a memcpy, let's still copy just in case.
return false;
}
return false;
} else if (srcBuffer) {
WARN_LOG_REPORT_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst);
if (g_Config.bBlockTransferGPU) {
// TODO: Validate x/y/w/h based on size and offset?
ReadFramebufferToMemory(srcBuffer, true, 0, 0, srcBuffer->width, srcBuffer->height);
}
return false;
} else {
return false;
}
}
bool FramebufferManager::NotifyBlockTransfer(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) {
u32 FramebufferManager::FramebufferByteSize(const VirtualFramebuffer *vfb) const {
return vfb->fb_stride * vfb->height * (vfb->format == GE_FORMAT_8888 ? 4 : 2);
}
void FramebufferManager::FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int bpp) const {
u32 dstYOffset = -1;
u32 srcYOffset = -1;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
const u32 vfb_address = 0x04000000 | vfb->fb_address;
const u32 vfb_size = FramebufferByteSize(vfb);
if (vfb_address <= dstBasePtr && dstBasePtr < vfb_address + vfb_size) {
const u32 yOffset = (dstBasePtr - vfb_address) / (dstStride * bpp);
if (yOffset < dstYOffset) {
dstYOffset = yOffset;
dstBuffer = vfb;
}
}
if (vfb_address <= srcBasePtr && srcBasePtr < vfb_address + vfb_size) {
const u32 yOffset = (srcBasePtr - vfb_address) / (srcStride * bpp);
if (yOffset < srcYOffset) {
srcYOffset = yOffset;
srcBuffer = vfb;
}
}
}
if (dstYOffset != (u32)-1) {
dstY += dstYOffset;
}
if (srcYOffset >= (u32)-1) {
srcY += srcYOffset;
}
}
bool FramebufferManager::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) {
if (!(g_Config.iRenderingMode == FB_BUFFERED_MODE)) {
return false;
}
if (Memory::IsRAMAddress(srcBasePtr) && Memory::IsVRAMAddress(dstBasePtr)) {
// TODO: This causes glitches in Tactics Ogre if we don't implement both ways (which will probably be slow...)
// The main thing this helps is videos, which will have a matching stride, and zero x/y.
if (dstStride == srcStride && dstY == 0 && dstX == 0 && srcX == 0 && srcY == 0) {
UpdateFromMemory(dstBasePtr, (dstY + height) * dstStride * bpp, true);
// Skip checking if there's no framebuffers in that area.
if (!MayIntersectFramebuffer(srcBasePtr) && !MayIntersectFramebuffer(dstBasePtr)) {
return false;
}
VirtualFramebuffer *dstBuffer = 0;
VirtualFramebuffer *srcBuffer = 0;
FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, bpp);
if (dstBuffer && srcBuffer) {
if (srcBuffer == dstBuffer) {
WARN_LOG_REPORT_ONCE(dstsrc, G3D, "Intra-buffer block transfer (not supported) %08x -> %08x", srcBasePtr, dstBasePtr);
} else {
WARN_LOG_ONCE(dstnotsrc, G3D, "Inter-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr);
// Just do the blit!
if (g_Config.bBlockTransferGPU) {
BlitFramebuffer_(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, width, height, bpp);
return true; // No need to actually do the memory copy behind, probably.
}
}
return false;
} else if (dstBuffer) {
// Here we should just draw the pixels into the buffer. Copy first.
return false;
} else if (srcBuffer) {
WARN_LOG_ONCE(btd, G3D, "Block transfer download %08x -> %08x", srcBasePtr, dstBasePtr);
if (g_Config.bBlockTransferGPU && (srcBuffer == currentRenderVfb_ || !srcBuffer->memoryUpdated)) {
ReadFramebufferToMemory(srcBuffer, true, srcX, srcY, width, height);
}
return false; // Let the bit copy happen
} else {
return false;
}
}
void FramebufferManager::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) {
if (!(g_Config.iRenderingMode == FB_BUFFERED_MODE)) {
return;
}
// A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to
@ -1858,6 +1997,7 @@ bool FramebufferManager::NotifyBlockTransfer(u32 dstBasePtr, int dstStride, int
u32 backBuffer = PrevDisplayFramebufAddr();
u32 displayBuffer = DisplayFramebufAddr();
// TODO: Is this not handled by upload? Should we check !dstBuffer to avoid a double copy?
if (((backBuffer != 0 && dstBasePtr == backBuffer) ||
(displayBuffer != 0 && dstBasePtr == displayBuffer)) &&
dstStride == 512 && height == 272) {
@ -1865,51 +2005,32 @@ bool FramebufferManager::NotifyBlockTransfer(u32 dstBasePtr, int dstStride, int
DrawFramebuffer(Memory::GetPointerUnchecked(dstBasePtr), GE_FORMAT_8888, 512, false);
}
VirtualFramebuffer *dstBuffer = 0;
VirtualFramebuffer *srcBuffer = 0;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
const u32 vfb_address = 0x04000000 | vfb->fb_address;
const u32 vfb_size = vfb->fb_stride * vfb->height * (vfb->format == GE_FORMAT_8888 ? 4 : 2);
if (vfb_address <= dstBasePtr && dstBasePtr < vfb_address + vfb_size) {
dstY += (dstBasePtr - vfb_address) / (dstStride * bpp);
dstBuffer = vfb;
}
if (vfb_address <= srcBasePtr && srcBasePtr < vfb_address + vfb_size) {
srcY += (srcBasePtr - vfb_address) / (srcStride * bpp);
srcBuffer = vfb;
}
}
if (MayIntersectFramebuffer(srcBasePtr) || MayIntersectFramebuffer(dstBasePtr)) {
VirtualFramebuffer *dstBuffer = 0;
VirtualFramebuffer *srcBuffer = 0;
FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, bpp);
if (dstBuffer && srcBuffer) {
if (srcBuffer == dstBuffer) {
WARN_LOG_REPORT_ONCE(dstsrc, G3D, "Intra-buffer block transfer (not supported) %08x -> %08x", srcBasePtr, dstBasePtr);
} else {
WARN_LOG_ONCE(dstnotsrc, G3D, "Inter-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr);
// Just do the blit!
// TODO: Possibly take bpp into account somehow if games are doing really crazy things?
if (dstBuffer && !srcBuffer) {
WARN_LOG_REPORT_ONCE(btu, G3D, "Block transfer upload %08x -> %08x", srcBasePtr, dstBasePtr);
if (g_Config.bBlockTransferGPU) {
BlitFramebuffer_(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, width, height, bpp);
return true; // No need to actually do the memory copy behind, probably.
const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
fbo_bind_as_render_target(dstBuffer->fbo);
int dstBpp = dstBuffer->format == GE_FORMAT_8888 ? 4 : 2;
float dstXFactor = (float)bpp / dstBpp;
glViewport(0, 0, dstBuffer->renderWidth, dstBuffer->renderHeight);
DrawPixels(dstBuffer, dstX * dstXFactor, dstY, srcBase, dstBuffer->format, srcStride * dstXFactor, width * dstXFactor, height);
dstBuffer->dirtyAfterDisplay = true;
if ((gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
dstBuffer->reallyDirtyAfterDisplay = true;
if (currentRenderVfb_) {
fbo_bind_as_render_target(currentRenderVfb_->fbo);
} else {
fbo_unbind();
}
glstate.viewport.restore();
gstate_c.textureChanged = TEXCHANGE_PARAMSONLY;
}
}
return false;
} else if (dstBuffer) {
WARN_LOG_REPORT_ONCE(btu, G3D, "Block transfer upload (not supported) %08x -> %08x", srcBasePtr, dstBasePtr);
if (g_Config.bBlockTransferGPU) {
u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
DrawPixels(dstBuffer, dstX, dstY, srcBase, dstBuffer->format, srcStride * bpp, width, height);
}
// Here we should just draw the pixels into the buffer.
return false;
} else if (srcBuffer && g_Config.iRenderingMode == FB_BUFFERED_MODE) {
WARN_LOG_ONCE(btd, G3D, "Block transfer download %08x -> %08x", srcBasePtr, dstBasePtr);
if (g_Config.bBlockTransferGPU) {
ReadFramebufferToMemory(srcBuffer, true, srcX, srcY, width, height);
}
return false; // Let the bit copy happen
} else {
return false;
}
}

View File

@ -166,7 +166,8 @@ public:
// Returns true if it's sure this is a direct FBO->FBO transfer and it has already handle it.
// In that case we hardly need to actually copy the bytes in VRAM, they will be wrong anyway (unless
// read framebuffers is on, in which case this should always return false).
bool NotifyBlockTransfer(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp);
bool NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp);
void NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp);
// Reads a rectangular subregion of a framebuffer to the right position in its backing memory.
void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h);
@ -199,7 +200,17 @@ public:
}
}
void NotifyFramebufferCopy(u32 src, u32 dest, int size);
bool MayIntersectFramebuffer(u32 start) {
// Clear the cache/kernel bits.
start = start & 0x3FFFFFFF;
// Most games only have two framebuffers at the start.
if (start >= framebufRangeEnd_ || start < PSP_GetVidMemBase()) {
return false;
}
return true;
}
bool NotifyFramebufferCopy(u32 src, u32 dest, int size);
void DestroyFramebuf(VirtualFramebuffer *vfb);
@ -211,6 +222,9 @@ private:
void CompileDraw2DProgram();
void DestroyDraw2DProgram();
void FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int bpp) const;
u32 FramebufferByteSize(const VirtualFramebuffer *vfb) const;
void SetNumExtraFBOs(int num);
u32 displayFramebufPtr_;
@ -227,11 +241,11 @@ private:
VirtualFramebuffer *currentRenderVfb_;
// Used by ReadFramebufferToMemory and later framebuffer block copies
void BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp);
void BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false);
#ifndef USING_GLES2
void PackFramebufferAsync_(VirtualFramebuffer *vfb);
#endif
void PackFramebufferSync_(VirtualFramebuffer *vfb);
void PackFramebufferSync_(VirtualFramebuffer *vfb, int x, int y, int w, int h);
// Used by DrawPixels
unsigned int drawPixelsTex_;
@ -239,7 +253,8 @@ private:
int drawPixelsTexW_;
int drawPixelsTexH_;
u8 *convBuf;
u8 *convBuf_;
u32 convBufSize_;
GLSLProgram *draw2dprogram_;
GLSLProgram *plainColorProgram_;
GLSLProgram *postShaderProgram_;
@ -257,7 +272,11 @@ private:
bool resized_;
bool useBufferedRendering_;
bool updateVRAM_;
bool gameUsesSequentialCopies_;
// The range of PSP memory that may contain FBOs. So we can skip iterating.
u32 framebufRangeEnd_;
std::vector<VirtualFramebuffer *> bvfbs_; // blitting FBOs
std::map<std::pair<int, int>, FBO *> renderCopies_;

View File

@ -661,6 +661,10 @@ void GLES_GPU::ProcessEvent(GPUEvent ev) {
InvalidateCacheInternal(ev.invalidate_cache.addr, ev.invalidate_cache.size, ev.invalidate_cache.type);
break;
case GPU_EVENT_FB_MEMCPY:
UpdateMemoryInternal(ev.fb_memcpy.dst, ev.fb_memcpy.src, ev.fb_memcpy.size);
break;
default:
GPUCommon::ProcessEvent(ev);
}
@ -1908,30 +1912,31 @@ void GLES_GPU::DoBlockTransfer() {
return;
}
// Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?)
// Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them
// entirely by walking a couple of pointers...
if (srcStride == dstStride && width == srcStride) {
// Common case in God of War, let's do it all in one chunk.
u32 srcLineStartAddr = srcBasePtr + (srcY * srcStride + srcX) * bpp;
u32 dstLineStartAddr = dstBasePtr + (dstY * dstStride + dstX) * bpp;
const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr);
u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr);
memcpy(dst, src, width * height * bpp);
} else {
for (int y = 0; y < height; y++) {
u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
// Tell the framebuffer manager to take action if possible. If it does the entire thing, let's just return.
if (!framebufferManager_.NotifyBlockTransferBefore(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp)) {
// Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?)
// Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them
// entirely by walking a couple of pointers...
if (srcStride == dstStride && width == srcStride) {
// Common case in God of War, let's do it all in one chunk.
u32 srcLineStartAddr = srcBasePtr + (srcY * srcStride + srcX) * bpp;
u32 dstLineStartAddr = dstBasePtr + (dstY * dstStride + dstX) * bpp;
const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr);
u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr);
memcpy(dst, src, width * bpp);
}
}
memcpy(dst, src, width * height * bpp);
} else {
for (int y = 0; y < height; y++) {
u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr);
u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr);
memcpy(dst, src, width * bpp);
}
}
// Tell the framebuffer manager to take action if possible. If it does the entire thing, let's just return.
if (!framebufferManager_.NotifyBlockTransfer(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp)) {
textureCache_.Invalidate(dstBasePtr + (dstY * dstStride + dstX) * bpp, height * dstStride * bpp, GPU_INVALIDATE_HINT);
framebufferManager_.NotifyBlockTransferAfter(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp);
}
#ifndef MOBILE_DEVICE
@ -1954,19 +1959,46 @@ void GLES_GPU::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType t
else
textureCache_.InvalidateAll(type);
if (type != GPU_INVALIDATE_ALL)
framebufferManager_.UpdateFromMemory(addr, size, type == GPU_INVALIDATE_SAFE);
if (type != GPU_INVALIDATE_ALL && framebufferManager_.MayIntersectFramebuffer(addr)) {
// If we're doing block transfers, we shouldn't need this, and it'll only confuse us.
// Vempire invalidates (with writeback) after drawing, but before blitting.
if (!g_Config.bBlockTransferGPU || type == GPU_INVALIDATE_SAFE) {
framebufferManager_.UpdateFromMemory(addr, size, type == GPU_INVALIDATE_SAFE);
}
}
}
void GLES_GPU::UpdateMemory(u32 dest, u32 src, int size) {
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
// Track stray copies of a framebuffer in RAM. MotoGP does this.
if (Memory::IsVRAMAddress(src) && Memory::IsRAMAddress(dest)) {
framebufferManager_.NotifyFramebufferCopy(src, dest, size);
void GLES_GPU::UpdateMemoryInternal(u32 dest, u32 src, int size) {
if (!framebufferManager_.NotifyFramebufferCopy(src, dest, size)) {
Memory::Memcpy(dest, Memory::GetPointer(src), size);
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
} else {
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
}
}
bool GLES_GPU::UpdateMemory(u32 dest, u32 src, int size) {
// Track stray copies of a framebuffer in RAM. MotoGP does this.
if (framebufferManager_.MayIntersectFramebuffer(src) || framebufferManager_.MayIntersectFramebuffer(dest)) {
if (IsOnSeparateCPUThread()) {
GPUEvent ev(GPU_EVENT_FB_MEMCPY);
ev.fb_memcpy.dst = dest;
ev.fb_memcpy.src = src;
ev.fb_memcpy.size = size;
ScheduleEvent(ev);
// This is a memcpy, so we need to wait for it to complete.
SyncThread();
} else {
UpdateMemoryInternal(dest, src, size);
}
return true;
}
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
return false;
}
void GLES_GPU::ClearCacheNextFrame() {
textureCache_.ClearNextFrame();
}

View File

@ -44,7 +44,7 @@ public:
virtual void BeginFrame();
virtual void UpdateStats();
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type);
virtual void UpdateMemory(u32 dest, u32 src, int size);
virtual bool UpdateMemory(u32 dest, u32 src, int size);
virtual void ClearCacheNextFrame();
virtual void DeviceLost(); // Only happens on Android. Drop all textures and shaders.
@ -151,6 +151,7 @@ private:
void InitClearInternal();
void BeginFrameInternal();
void CopyDisplayToOutputInternal();
void UpdateMemoryInternal(u32 dest, u32 src, int size);
void InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type);
static CommandInfo cmdInfo_[256];

View File

@ -163,6 +163,7 @@ enum GPUEventType {
GPU_EVENT_INVALIDATE_CACHE,
GPU_EVENT_FINISH_EVENT_LOOP,
GPU_EVENT_SYNC_THREAD,
GPU_EVENT_FB_MEMCPY,
};
struct GPUEvent {
@ -175,6 +176,12 @@ struct GPUEvent {
int size;
GPUInvalidationType type;
} invalidate_cache;
// GPU_EVENT_FB_MEMCPY
struct {
u32 dst;
u32 src;
int size;
} fb_memcpy;
};
operator GPUEventType() const {
@ -227,7 +234,7 @@ public:
// If size = -1, invalidate everything.
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type) = 0;
// Update either RAM from VRAM, or VRAM from RAM... or even VRAM from VRAM.
virtual void UpdateMemory(u32 dest, u32 src, int size) = 0;
virtual bool UpdateMemory(u32 dest, u32 src, int size) = 0;
// Will cause the texture cache to be cleared at the start of the next frame.
virtual void ClearCacheNextFrame() = 0;

View File

@ -657,7 +657,8 @@ void NullGPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type) {
// Nothing to invalidate.
}
void NullGPU::UpdateMemory(u32 dest, u32 src, int size) {
bool NullGPU::UpdateMemory(u32 dest, u32 src, int size) {
// Nothing to update.
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
return false;
}

View File

@ -34,7 +34,7 @@ public:
virtual void CopyDisplayToOutput() {}
virtual void UpdateStats();
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type);
virtual void UpdateMemory(u32 dest, u32 src, int size);
virtual bool UpdateMemory(u32 dest, u32 src, int size);
virtual void ClearCacheNextFrame() {};
virtual void DeviceLost() {}

View File

@ -851,12 +851,13 @@ void SoftGPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type)
// Nothing to invalidate.
}
void SoftGPU::UpdateMemory(u32 dest, u32 src, int size)
bool SoftGPU::UpdateMemory(u32 dest, u32 src, int size)
{
// Nothing to update.
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
// Let's just be safe.
framebufferDirty_ = true;
return false;
}
bool SoftGPU::FramebufferDirty() {

View File

@ -59,7 +59,7 @@ public:
virtual void CopyDisplayToOutput();
virtual void UpdateStats();
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type);
virtual void UpdateMemory(u32 dest, u32 src, int size);
virtual bool UpdateMemory(u32 dest, u32 src, int size);
virtual void ClearCacheNextFrame() {};
virtual void DeviceLost() {}