mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-12-11 23:53:55 +00:00
Merge pull request #6141 from unknownbrackets/gpu-blocktransfer
Fix a heap overflow in block transfers, fix most save picture downloads, optimizations
This commit is contained in:
commit
eead104ebe
@ -104,7 +104,11 @@ static int Replace_memcpy() {
|
||||
u32 destPtr = PARAM(0);
|
||||
u32 srcPtr = PARAM(1);
|
||||
u32 bytes = PARAM(2);
|
||||
if (bytes != 0) {
|
||||
bool skip = false;
|
||||
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
|
||||
skip = gpu->UpdateMemory(destPtr, srcPtr, bytes);
|
||||
}
|
||||
if (!skip && bytes != 0) {
|
||||
u8 *dst = Memory::GetPointerUnchecked(destPtr);
|
||||
u8 *src = Memory::GetPointerUnchecked(srcPtr);
|
||||
memmove(dst, src, bytes);
|
||||
@ -114,9 +118,6 @@ static int Replace_memcpy() {
|
||||
CBreakPoints::ExecMemCheck(srcPtr, false, bytes, currentMIPS->pc);
|
||||
CBreakPoints::ExecMemCheck(destPtr, true, bytes, currentMIPS->pc);
|
||||
#endif
|
||||
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
|
||||
gpu->UpdateMemory(destPtr, srcPtr, bytes);
|
||||
}
|
||||
return 10 + bytes / 4; // approximation
|
||||
}
|
||||
|
||||
@ -124,7 +125,11 @@ static int Replace_memcpy16() {
|
||||
u32 destPtr = PARAM(0);
|
||||
u32 srcPtr = PARAM(1);
|
||||
u32 bytes = PARAM(2) * 16;
|
||||
if (bytes != 0) {
|
||||
bool skip = false;
|
||||
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
|
||||
skip = gpu->UpdateMemory(destPtr, srcPtr, bytes);
|
||||
}
|
||||
if (!skip && bytes != 0) {
|
||||
u8 *dst = Memory::GetPointerUnchecked(destPtr);
|
||||
u8 *src = Memory::GetPointerUnchecked(srcPtr);
|
||||
memmove(dst, src, bytes);
|
||||
@ -134,9 +139,6 @@ static int Replace_memcpy16() {
|
||||
CBreakPoints::ExecMemCheck(srcPtr, false, bytes, currentMIPS->pc);
|
||||
CBreakPoints::ExecMemCheck(destPtr, true, bytes, currentMIPS->pc);
|
||||
#endif
|
||||
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
|
||||
gpu->UpdateMemory(destPtr, srcPtr, bytes);
|
||||
}
|
||||
return 10 + bytes / 4; // approximation
|
||||
}
|
||||
|
||||
@ -144,7 +146,11 @@ static int Replace_memmove() {
|
||||
u32 destPtr = PARAM(0);
|
||||
u32 srcPtr = PARAM(1);
|
||||
u32 bytes = PARAM(2);
|
||||
if (bytes != 0) {
|
||||
bool skip = false;
|
||||
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
|
||||
skip = gpu->UpdateMemory(destPtr, srcPtr, bytes);
|
||||
}
|
||||
if (!skip && bytes != 0) {
|
||||
u8 *dst = Memory::GetPointerUnchecked(destPtr);
|
||||
u8 *src = Memory::GetPointerUnchecked(srcPtr);
|
||||
memmove(dst, src, bytes);
|
||||
@ -154,9 +160,6 @@ static int Replace_memmove() {
|
||||
CBreakPoints::ExecMemCheck(srcPtr, false, bytes, currentMIPS->pc);
|
||||
CBreakPoints::ExecMemCheck(destPtr, true, bytes, currentMIPS->pc);
|
||||
#endif
|
||||
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
|
||||
gpu->UpdateMemory(destPtr, srcPtr, bytes);
|
||||
}
|
||||
return 10 + bytes / 4; // approximation
|
||||
}
|
||||
|
||||
@ -165,14 +168,17 @@ static int Replace_memset() {
|
||||
u8 *dst = Memory::GetPointerUnchecked(destPtr);
|
||||
u8 value = PARAM(1);
|
||||
u32 bytes = PARAM(2);
|
||||
memset(dst, value, bytes);
|
||||
bool skip = false;
|
||||
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(destPtr)) {
|
||||
skip = gpu->UpdateMemory(destPtr, destPtr, bytes);
|
||||
}
|
||||
if (!skip) {
|
||||
memset(dst, value, bytes);
|
||||
}
|
||||
RETURN(destPtr);
|
||||
#ifndef MOBILE_DEVICE
|
||||
CBreakPoints::ExecMemCheck(destPtr, true, bytes, currentMIPS->pc);
|
||||
#endif
|
||||
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(destPtr)) {
|
||||
gpu->UpdateMemory(destPtr, destPtr, bytes);
|
||||
}
|
||||
return 10 + bytes / 4; // approximation
|
||||
}
|
||||
|
||||
|
@ -43,16 +43,17 @@ void __DmacDoState(PointerWrap &p) {
|
||||
}
|
||||
|
||||
int __DmacMemcpy(u32 dst, u32 src, u32 size) {
|
||||
Memory::Memcpy(dst, Memory::GetPointer(src), size);
|
||||
#ifndef MOBILE_DEVICE
|
||||
CBreakPoints::ExecMemCheck(src, false, size, currentMIPS->pc);
|
||||
CBreakPoints::ExecMemCheck(dst, true, size, currentMIPS->pc);
|
||||
#endif
|
||||
|
||||
src &= ~0x40000000;
|
||||
dst &= ~0x40000000;
|
||||
bool skip = false;
|
||||
if (Memory::IsVRAMAddress(src) || Memory::IsVRAMAddress(dst)) {
|
||||
gpu->UpdateMemory(dst, src, size);
|
||||
skip = gpu->UpdateMemory(dst, src, size);
|
||||
}
|
||||
if (!skip) {
|
||||
Memory::Memcpy(dst, Memory::GetPointer(src), size);
|
||||
}
|
||||
|
||||
// This number seems strangely reproducible.
|
||||
|
@ -559,11 +559,14 @@ u32 sceKernelMemset(u32 addr, u32 fillc, u32 n)
|
||||
u32 sceKernelMemcpy(u32 dst, u32 src, u32 size)
|
||||
{
|
||||
DEBUG_LOG(SCEKERNEL, "sceKernelMemcpy(dest=%08x, src=%08x, size=%i)", dst, src, size);
|
||||
// Hm, sceDmacMemcpy seems to be the popular one for this. Ignoring for now.
|
||||
// gpu->UpdateMemory(dst, src, size);
|
||||
|
||||
bool skip = false;
|
||||
if (Memory::IsVRAMAddress(src) || Memory::IsVRAMAddress(dst)) {
|
||||
skip = gpu->UpdateMemory(dst, src, size);
|
||||
}
|
||||
|
||||
// Technically should crash if these are invalid and size > 0...
|
||||
if (Memory::IsValidAddress(dst) && Memory::IsValidAddress(src) && Memory::IsValidAddress(dst + size - 1) && Memory::IsValidAddress(src + size - 1))
|
||||
if (!skip && Memory::IsValidAddress(dst) && Memory::IsValidAddress(src) && Memory::IsValidAddress(dst + size - 1) && Memory::IsValidAddress(src + size - 1))
|
||||
{
|
||||
u8 *dstp = Memory::GetPointer(dst);
|
||||
u8 *srcp = Memory::GetPointer(src);
|
||||
|
@ -1315,8 +1315,9 @@ void DIRECTX9_GPU::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationTy
|
||||
framebufferManager_.UpdateFromMemory(addr, size);
|
||||
}
|
||||
|
||||
void DIRECTX9_GPU::UpdateMemory(u32 dest, u32 src, int size) {
|
||||
bool DIRECTX9_GPU::UpdateMemory(u32 dest, u32 src, int size) {
|
||||
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
|
||||
return false;
|
||||
}
|
||||
|
||||
void DIRECTX9_GPU::ClearCacheNextFrame() {
|
||||
|
@ -46,7 +46,7 @@ public:
|
||||
virtual void BeginFrame();
|
||||
virtual void UpdateStats();
|
||||
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type);
|
||||
virtual void UpdateMemory(u32 dest, u32 src, int size);
|
||||
virtual bool UpdateMemory(u32 dest, u32 src, int size);
|
||||
virtual void ClearCacheNextFrame();
|
||||
virtual void DeviceLost(); // Only happens on Android. Drop all textures and shaders.
|
||||
|
||||
|
@ -318,7 +318,7 @@ FramebufferManager::FramebufferManager() :
|
||||
currentRenderVfb_(0),
|
||||
drawPixelsTex_(0),
|
||||
drawPixelsTexFormat_(GE_FORMAT_INVALID),
|
||||
convBuf(0),
|
||||
convBuf_(0),
|
||||
draw2dprogram_(0),
|
||||
postShaderProgram_(0),
|
||||
plainColorLoc_(-1),
|
||||
@ -327,7 +327,9 @@ FramebufferManager::FramebufferManager() :
|
||||
shaderManager_(0),
|
||||
usePostShader_(false),
|
||||
postShaderAtOutputResolution_(false),
|
||||
resized_(false)
|
||||
resized_(false),
|
||||
gameUsesSequentialCopies_(false),
|
||||
framebufRangeEnd_(0)
|
||||
#ifndef USING_GLES2
|
||||
,
|
||||
pixelBufObj_(0),
|
||||
@ -361,7 +363,7 @@ FramebufferManager::~FramebufferManager() {
|
||||
#ifndef USING_GLES2
|
||||
delete [] pixelBufObj_;
|
||||
#endif
|
||||
delete [] convBuf;
|
||||
delete [] convBuf_;
|
||||
}
|
||||
|
||||
void FramebufferManager::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
|
||||
@ -394,15 +396,18 @@ void FramebufferManager::MakePixelTexture(const u8 *srcPixels, GEBufferFormat sr
|
||||
bool useConvBuf = false;
|
||||
if (srcPixelFormat != GE_FORMAT_8888 || srcStride != width) {
|
||||
useConvBuf = true;
|
||||
if (!convBuf) {
|
||||
convBuf = new u8[width * height * 4];
|
||||
u32 neededSize = width * height * 4;
|
||||
if (!convBuf_ || convBufSize_ < neededSize) {
|
||||
delete [] convBuf_;
|
||||
convBuf_ = new u8[neededSize];
|
||||
convBufSize_ = neededSize;
|
||||
}
|
||||
for (int y = 0; y < height; y++) {
|
||||
switch (srcPixelFormat) {
|
||||
case GE_FORMAT_565:
|
||||
{
|
||||
const u16 *src = (const u16 *)srcPixels + srcStride * y;
|
||||
u8 *dst = convBuf + 4 * width * y;
|
||||
u8 *dst = convBuf_ + 4 * width * y;
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
u16 col = src[x];
|
||||
@ -417,7 +422,7 @@ void FramebufferManager::MakePixelTexture(const u8 *srcPixels, GEBufferFormat sr
|
||||
case GE_FORMAT_5551:
|
||||
{
|
||||
const u16 *src = (const u16 *)srcPixels + srcStride * y;
|
||||
u8 *dst = convBuf + 4 * width * y;
|
||||
u8 *dst = convBuf_ + 4 * width * y;
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
u16 col = src[x];
|
||||
@ -432,7 +437,7 @@ void FramebufferManager::MakePixelTexture(const u8 *srcPixels, GEBufferFormat sr
|
||||
case GE_FORMAT_4444:
|
||||
{
|
||||
const u16 *src = (const u16 *)srcPixels + srcStride * y;
|
||||
u8 *dst = convBuf + 4 * width * y;
|
||||
u8 *dst = convBuf_ + 4 * width * y;
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
u16 col = src[x];
|
||||
@ -447,7 +452,7 @@ void FramebufferManager::MakePixelTexture(const u8 *srcPixels, GEBufferFormat sr
|
||||
case GE_FORMAT_8888:
|
||||
{
|
||||
const u8 *src = srcPixels + srcStride * 4 * y;
|
||||
u8 *dst = convBuf + 4 * width * y;
|
||||
u8 *dst = convBuf_ + 4 * width * y;
|
||||
memcpy(dst, src, 4 * width);
|
||||
}
|
||||
break;
|
||||
@ -458,7 +463,7 @@ void FramebufferManager::MakePixelTexture(const u8 *srcPixels, GEBufferFormat sr
|
||||
}
|
||||
}
|
||||
}
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, useConvBuf ? convBuf : srcPixels);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, useConvBuf ? convBuf_ : srcPixels);
|
||||
}
|
||||
|
||||
void FramebufferManager::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
|
||||
@ -833,6 +838,12 @@ void FramebufferManager::DoSetRenderFrameBuffer() {
|
||||
glEnable(GL_DITHER); // why?
|
||||
currentRenderVfb_ = vfb;
|
||||
|
||||
u32 byteSize = FramebufferByteSize(vfb);
|
||||
u32 fb_address_mem = (fb_address & 0x3FFFFFFF) | 0x04000000;
|
||||
if (fb_address_mem + byteSize > framebufRangeEnd_) {
|
||||
framebufRangeEnd_ = fb_address_mem + byteSize;
|
||||
}
|
||||
|
||||
INFO_LOG(SCEGE, "Creating FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->format);
|
||||
|
||||
// Let's check for depth buffer overlap. Might be interesting.
|
||||
@ -1230,20 +1241,42 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s
|
||||
#endif
|
||||
}
|
||||
|
||||
vfb->memoryUpdated = true;
|
||||
BlitFramebuffer_(nvfb, x, y, vfb, x, y, w, h, 0);
|
||||
if (gameUsesSequentialCopies_) {
|
||||
// Ignore the x/y/etc., read the entire thing.
|
||||
x = 0;
|
||||
y = 0;
|
||||
w = vfb->width;
|
||||
h = vfb->height;
|
||||
}
|
||||
if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
|
||||
vfb->memoryUpdated = true;
|
||||
} else {
|
||||
const static int FREQUENT_SEQUENTIAL_COPIES = 3;
|
||||
static int frameLastCopy = 0;
|
||||
static u32 bufferLastCopy = 0;
|
||||
static int copiesThisFrame = 0;
|
||||
if (frameLastCopy != gpuStats.numFlips || bufferLastCopy != vfb->fb_address) {
|
||||
frameLastCopy = gpuStats.numFlips;
|
||||
bufferLastCopy = vfb->fb_address;
|
||||
copiesThisFrame = 0;
|
||||
}
|
||||
if (++copiesThisFrame > FREQUENT_SEQUENTIAL_COPIES) {
|
||||
gameUsesSequentialCopies_ = true;
|
||||
}
|
||||
}
|
||||
BlitFramebuffer_(nvfb, x, y, vfb, x, y, w, h, 0, true);
|
||||
|
||||
// PackFramebufferSync_() - Synchronous pixel data transfer using glReadPixels
|
||||
// PackFramebufferAsync_() - Asynchronous pixel data transfer using glReadPixels with PBOs
|
||||
|
||||
#ifdef USING_GLES2
|
||||
PackFramebufferSync_(nvfb);
|
||||
PackFramebufferSync_(nvfb, x, y, w, h);
|
||||
#else
|
||||
if (gl_extensions.PBO_ARB && gl_extensions.OES_texture_npot) {
|
||||
if (!sync) {
|
||||
PackFramebufferAsync_(nvfb);
|
||||
} else {
|
||||
PackFramebufferSync_(nvfb);
|
||||
PackFramebufferSync_(nvfb, x, y, w, h);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -1254,7 +1287,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s
|
||||
}
|
||||
|
||||
// TODO: If dimensions are the same, we can use glCopyImageSubData.
|
||||
void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) {
|
||||
void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip) {
|
||||
if (!dst->fbo) {
|
||||
ERROR_LOG_REPORT_ONCE(dstfbozero, SCEGE, "BlitFramebuffer_: dst->fbo == 0");
|
||||
fbo_unbind();
|
||||
@ -1300,6 +1333,11 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int
|
||||
int dstY2 = dst->renderHeight - (h + dstY) * dstYFactor;
|
||||
int dstY1 = dstY2 + h * dstYFactor;
|
||||
|
||||
if (flip) {
|
||||
dstY1 = dst->renderHeight - dstY1;
|
||||
dstY2 = dst->renderHeight - dstY2;
|
||||
}
|
||||
|
||||
#ifdef MAY_HAVE_GLES3
|
||||
fbo_bind_for_read(src->fbo);
|
||||
if (!useNV) {
|
||||
@ -1577,7 +1615,7 @@ void FramebufferManager::PackFramebufferAsync_(VirtualFramebuffer *vfb) {
|
||||
|
||||
#endif
|
||||
|
||||
void FramebufferManager::PackFramebufferSync_(VirtualFramebuffer *vfb) {
|
||||
void FramebufferManager::PackFramebufferSync_(VirtualFramebuffer *vfb, int x, int y, int w, int h) {
|
||||
if (vfb->fbo) {
|
||||
fbo_bind_for_read(vfb->fbo);
|
||||
} else {
|
||||
@ -1600,7 +1638,13 @@ void FramebufferManager::PackFramebufferSync_(VirtualFramebuffer *vfb) {
|
||||
if (!convert) {
|
||||
packed = (GLubyte *)Memory::GetPointer(fb_address);
|
||||
} else { // End result may be 16-bit but we are reading 32-bit, so there may not be enough space at fb_address
|
||||
packed = (GLubyte *)malloc(bufSize * sizeof(GLubyte));
|
||||
u32 neededSize = (u32)bufSize * sizeof(GLubyte);
|
||||
if (!convBuf_ || convBufSize_ < neededSize) {
|
||||
delete [] convBuf_;
|
||||
convBuf_ = new u8[neededSize];
|
||||
convBufSize_ = neededSize;
|
||||
}
|
||||
packed = convBuf_;
|
||||
}
|
||||
|
||||
if (packed) {
|
||||
@ -1614,12 +1658,12 @@ void FramebufferManager::PackFramebufferSync_(VirtualFramebuffer *vfb) {
|
||||
glfmt = GL_BGRA_EXT;
|
||||
}
|
||||
#endif
|
||||
glReadPixels(0, 0, vfb->fb_stride, vfb->height, glfmt, GL_UNSIGNED_BYTE, packed);
|
||||
int byteOffset = y * vfb->fb_stride * 4;
|
||||
glReadPixels(0, y, vfb->fb_stride, h, glfmt, GL_UNSIGNED_BYTE, packed + byteOffset);
|
||||
// LogReadPixelsError(glGetError());
|
||||
|
||||
if (convert) {
|
||||
ConvertFromRGBA8888(Memory::GetPointer(fb_address), packed, vfb->fb_stride, vfb->height, vfb->format);
|
||||
free(packed);
|
||||
ConvertFromRGBA8888(Memory::GetPointer(fb_address + byteOffset), packed + byteOffset, vfb->fb_stride, h, vfb->format);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1790,14 +1834,20 @@ void FramebufferManager::UpdateFromMemory(u32 addr, int size, bool safe) {
|
||||
}
|
||||
}
|
||||
|
||||
void FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size) {
|
||||
bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size) {
|
||||
if (!(g_Config.iRenderingMode == FB_BUFFERED_MODE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// MotoGP workaround
|
||||
for (size_t i = 0; i < vfbs_.size(); i++) {
|
||||
int bpp = vfbs_[i]->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
int fsize = vfbs_[i]->fb_stride * vfbs_[i]->height * (vfbs_[i]->format == GE_FORMAT_8888 ? 4 : 2);
|
||||
if ((vfbs_[i]->fb_address | 0x04000000) == src && size == fsize) {
|
||||
// A framebuffer matched!
|
||||
knownFramebufferRAMCopies_.insert(std::pair<u32, u32>(src, dst));
|
||||
if (Memory::IsVRAMAddress(src) && Memory::IsRAMAddress(dst)) {
|
||||
for (size_t i = 0; i < vfbs_.size(); i++) {
|
||||
int bpp = vfbs_[i]->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
int fsize = FramebufferByteSize(vfbs_[i]);
|
||||
if (MaskedEqual(vfbs_[i]->fb_address, src) && size == fsize) {
|
||||
// A framebuffer matched!
|
||||
knownFramebufferRAMCopies_.insert(std::pair<u32, u32>(src, dst));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1819,37 +1869,126 @@ void FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size) {
|
||||
if (srcBuffer == dstBuffer) {
|
||||
WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x", src, dst);
|
||||
} else {
|
||||
WARN_LOG_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x", src, dst);
|
||||
WARN_LOG_REPORT_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy (not supported) %08x -> %08x", src, dst);
|
||||
// Just do the blit!
|
||||
// TODO: Possibly take bpp into account somehow if games are doing really crazy things?
|
||||
// if (g_Config.bBlockTransferGPU) {
|
||||
// BlitFramebuffer_(dstBuffer, 0, 0, srcBuffer, 0, 0, srcBuffer->width, srcBuffer->height, 0);
|
||||
// }
|
||||
}
|
||||
Memory::Memcpy(dst, Memory::GetPointer(src), size);
|
||||
return true;
|
||||
} else if (dstBuffer) {
|
||||
WARN_LOG_REPORT_ONCE(btucpy, G3D, "Memcpy fbo upload (not supported) %08x -> %08x", src, dst);
|
||||
// Here we should just draw the pixels into the buffer.
|
||||
// if (g_Config.bBlockTransferGPU) {
|
||||
// }
|
||||
} else if (srcBuffer && g_Config.iRenderingMode == FB_BUFFERED_MODE) {
|
||||
WARN_LOG_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst);
|
||||
// if (g_Config.bBlockTransferGPU) {
|
||||
// ReadFramebufferToMemory(srcBuffer, true, 0, 0, srcBuffer->width, srcBuffer->height);
|
||||
// }
|
||||
WARN_LOG_REPORT_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x", src, dst);
|
||||
if (g_Config.bBlockTransferGPU) {
|
||||
const u8 *srcBase = Memory::GetPointerUnchecked(src);
|
||||
fbo_bind_as_render_target(dstBuffer->fbo);
|
||||
glViewport(0, 0, dstBuffer->renderWidth, dstBuffer->renderHeight);
|
||||
// TODO: Validate x/y/w/h based on size and offset?
|
||||
DrawPixels(dstBuffer, 0, 0, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstBuffer->height);
|
||||
dstBuffer->dirtyAfterDisplay = true;
|
||||
if ((gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
|
||||
dstBuffer->reallyDirtyAfterDisplay = true;
|
||||
if (currentRenderVfb_) {
|
||||
fbo_bind_as_render_target(currentRenderVfb_->fbo);
|
||||
} else {
|
||||
fbo_unbind();
|
||||
}
|
||||
glstate.viewport.restore();
|
||||
gstate_c.textureChanged = TEXCHANGE_PARAMSONLY;
|
||||
// This is a memcpy, let's still copy just in case.
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
} else if (srcBuffer) {
|
||||
WARN_LOG_REPORT_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst);
|
||||
if (g_Config.bBlockTransferGPU) {
|
||||
// TODO: Validate x/y/w/h based on size and offset?
|
||||
ReadFramebufferToMemory(srcBuffer, true, 0, 0, srcBuffer->width, srcBuffer->height);
|
||||
}
|
||||
return false;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool FramebufferManager::NotifyBlockTransfer(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) {
|
||||
u32 FramebufferManager::FramebufferByteSize(const VirtualFramebuffer *vfb) const {
|
||||
return vfb->fb_stride * vfb->height * (vfb->format == GE_FORMAT_8888 ? 4 : 2);
|
||||
}
|
||||
|
||||
void FramebufferManager::FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int bpp) const {
|
||||
u32 dstYOffset = -1;
|
||||
u32 srcYOffset = -1;
|
||||
for (size_t i = 0; i < vfbs_.size(); ++i) {
|
||||
VirtualFramebuffer *vfb = vfbs_[i];
|
||||
const u32 vfb_address = 0x04000000 | vfb->fb_address;
|
||||
const u32 vfb_size = FramebufferByteSize(vfb);
|
||||
if (vfb_address <= dstBasePtr && dstBasePtr < vfb_address + vfb_size) {
|
||||
const u32 yOffset = (dstBasePtr - vfb_address) / (dstStride * bpp);
|
||||
if (yOffset < dstYOffset) {
|
||||
dstYOffset = yOffset;
|
||||
dstBuffer = vfb;
|
||||
}
|
||||
}
|
||||
if (vfb_address <= srcBasePtr && srcBasePtr < vfb_address + vfb_size) {
|
||||
const u32 yOffset = (srcBasePtr - vfb_address) / (srcStride * bpp);
|
||||
if (yOffset < srcYOffset) {
|
||||
srcYOffset = yOffset;
|
||||
srcBuffer = vfb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dstYOffset != (u32)-1) {
|
||||
dstY += dstYOffset;
|
||||
}
|
||||
if (srcYOffset >= (u32)-1) {
|
||||
srcY += srcYOffset;
|
||||
}
|
||||
}
|
||||
|
||||
bool FramebufferManager::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) {
|
||||
if (!(g_Config.iRenderingMode == FB_BUFFERED_MODE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Memory::IsRAMAddress(srcBasePtr) && Memory::IsVRAMAddress(dstBasePtr)) {
|
||||
// TODO: This causes glitches in Tactics Ogre if we don't implement both ways (which will probably be slow...)
|
||||
// The main thing this helps is videos, which will have a matching stride, and zero x/y.
|
||||
if (dstStride == srcStride && dstY == 0 && dstX == 0 && srcX == 0 && srcY == 0) {
|
||||
UpdateFromMemory(dstBasePtr, (dstY + height) * dstStride * bpp, true);
|
||||
|
||||
// Skip checking if there's no framebuffers in that area.
|
||||
if (!MayIntersectFramebuffer(srcBasePtr) && !MayIntersectFramebuffer(dstBasePtr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
VirtualFramebuffer *dstBuffer = 0;
|
||||
VirtualFramebuffer *srcBuffer = 0;
|
||||
FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, bpp);
|
||||
|
||||
if (dstBuffer && srcBuffer) {
|
||||
if (srcBuffer == dstBuffer) {
|
||||
WARN_LOG_REPORT_ONCE(dstsrc, G3D, "Intra-buffer block transfer (not supported) %08x -> %08x", srcBasePtr, dstBasePtr);
|
||||
} else {
|
||||
WARN_LOG_ONCE(dstnotsrc, G3D, "Inter-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr);
|
||||
// Just do the blit!
|
||||
if (g_Config.bBlockTransferGPU) {
|
||||
BlitFramebuffer_(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, width, height, bpp);
|
||||
return true; // No need to actually do the memory copy behind, probably.
|
||||
}
|
||||
}
|
||||
return false;
|
||||
} else if (dstBuffer) {
|
||||
// Here we should just draw the pixels into the buffer. Copy first.
|
||||
return false;
|
||||
} else if (srcBuffer) {
|
||||
WARN_LOG_ONCE(btd, G3D, "Block transfer download %08x -> %08x", srcBasePtr, dstBasePtr);
|
||||
if (g_Config.bBlockTransferGPU && (srcBuffer == currentRenderVfb_ || !srcBuffer->memoryUpdated)) {
|
||||
ReadFramebufferToMemory(srcBuffer, true, srcX, srcY, width, height);
|
||||
}
|
||||
return false; // Let the bit copy happen
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void FramebufferManager::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) {
|
||||
if (!(g_Config.iRenderingMode == FB_BUFFERED_MODE)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to
|
||||
@ -1858,6 +1997,7 @@ bool FramebufferManager::NotifyBlockTransfer(u32 dstBasePtr, int dstStride, int
|
||||
u32 backBuffer = PrevDisplayFramebufAddr();
|
||||
u32 displayBuffer = DisplayFramebufAddr();
|
||||
|
||||
// TODO: Is this not handled by upload? Should we check !dstBuffer to avoid a double copy?
|
||||
if (((backBuffer != 0 && dstBasePtr == backBuffer) ||
|
||||
(displayBuffer != 0 && dstBasePtr == displayBuffer)) &&
|
||||
dstStride == 512 && height == 272) {
|
||||
@ -1865,51 +2005,32 @@ bool FramebufferManager::NotifyBlockTransfer(u32 dstBasePtr, int dstStride, int
|
||||
DrawFramebuffer(Memory::GetPointerUnchecked(dstBasePtr), GE_FORMAT_8888, 512, false);
|
||||
}
|
||||
|
||||
VirtualFramebuffer *dstBuffer = 0;
|
||||
VirtualFramebuffer *srcBuffer = 0;
|
||||
for (size_t i = 0; i < vfbs_.size(); ++i) {
|
||||
VirtualFramebuffer *vfb = vfbs_[i];
|
||||
const u32 vfb_address = 0x04000000 | vfb->fb_address;
|
||||
const u32 vfb_size = vfb->fb_stride * vfb->height * (vfb->format == GE_FORMAT_8888 ? 4 : 2);
|
||||
if (vfb_address <= dstBasePtr && dstBasePtr < vfb_address + vfb_size) {
|
||||
dstY += (dstBasePtr - vfb_address) / (dstStride * bpp);
|
||||
dstBuffer = vfb;
|
||||
}
|
||||
if (vfb_address <= srcBasePtr && srcBasePtr < vfb_address + vfb_size) {
|
||||
srcY += (srcBasePtr - vfb_address) / (srcStride * bpp);
|
||||
srcBuffer = vfb;
|
||||
}
|
||||
}
|
||||
if (MayIntersectFramebuffer(srcBasePtr) || MayIntersectFramebuffer(dstBasePtr)) {
|
||||
VirtualFramebuffer *dstBuffer = 0;
|
||||
VirtualFramebuffer *srcBuffer = 0;
|
||||
FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, bpp);
|
||||
|
||||
if (dstBuffer && srcBuffer) {
|
||||
if (srcBuffer == dstBuffer) {
|
||||
WARN_LOG_REPORT_ONCE(dstsrc, G3D, "Intra-buffer block transfer (not supported) %08x -> %08x", srcBasePtr, dstBasePtr);
|
||||
} else {
|
||||
WARN_LOG_ONCE(dstnotsrc, G3D, "Inter-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr);
|
||||
// Just do the blit!
|
||||
// TODO: Possibly take bpp into account somehow if games are doing really crazy things?
|
||||
if (dstBuffer && !srcBuffer) {
|
||||
WARN_LOG_REPORT_ONCE(btu, G3D, "Block transfer upload %08x -> %08x", srcBasePtr, dstBasePtr);
|
||||
if (g_Config.bBlockTransferGPU) {
|
||||
BlitFramebuffer_(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, width, height, bpp);
|
||||
return true; // No need to actually do the memory copy behind, probably.
|
||||
const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
|
||||
fbo_bind_as_render_target(dstBuffer->fbo);
|
||||
int dstBpp = dstBuffer->format == GE_FORMAT_8888 ? 4 : 2;
|
||||
float dstXFactor = (float)bpp / dstBpp;
|
||||
glViewport(0, 0, dstBuffer->renderWidth, dstBuffer->renderHeight);
|
||||
DrawPixels(dstBuffer, dstX * dstXFactor, dstY, srcBase, dstBuffer->format, srcStride * dstXFactor, width * dstXFactor, height);
|
||||
dstBuffer->dirtyAfterDisplay = true;
|
||||
if ((gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
|
||||
dstBuffer->reallyDirtyAfterDisplay = true;
|
||||
if (currentRenderVfb_) {
|
||||
fbo_bind_as_render_target(currentRenderVfb_->fbo);
|
||||
} else {
|
||||
fbo_unbind();
|
||||
}
|
||||
glstate.viewport.restore();
|
||||
gstate_c.textureChanged = TEXCHANGE_PARAMSONLY;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
} else if (dstBuffer) {
|
||||
WARN_LOG_REPORT_ONCE(btu, G3D, "Block transfer upload (not supported) %08x -> %08x", srcBasePtr, dstBasePtr);
|
||||
if (g_Config.bBlockTransferGPU) {
|
||||
u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
|
||||
DrawPixels(dstBuffer, dstX, dstY, srcBase, dstBuffer->format, srcStride * bpp, width, height);
|
||||
}
|
||||
// Here we should just draw the pixels into the buffer.
|
||||
return false;
|
||||
} else if (srcBuffer && g_Config.iRenderingMode == FB_BUFFERED_MODE) {
|
||||
WARN_LOG_ONCE(btd, G3D, "Block transfer download %08x -> %08x", srcBasePtr, dstBasePtr);
|
||||
if (g_Config.bBlockTransferGPU) {
|
||||
ReadFramebufferToMemory(srcBuffer, true, srcX, srcY, width, height);
|
||||
}
|
||||
return false; // Let the bit copy happen
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -166,7 +166,8 @@ public:
|
||||
// Returns true if it's sure this is a direct FBO->FBO transfer and it has already handle it.
|
||||
// In that case we hardly need to actually copy the bytes in VRAM, they will be wrong anyway (unless
|
||||
// read framebuffers is on, in which case this should always return false).
|
||||
bool NotifyBlockTransfer(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp);
|
||||
bool NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp);
|
||||
void NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp);
|
||||
|
||||
// Reads a rectangular subregion of a framebuffer to the right position in its backing memory.
|
||||
void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h);
|
||||
@ -199,7 +200,17 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void NotifyFramebufferCopy(u32 src, u32 dest, int size);
|
||||
bool MayIntersectFramebuffer(u32 start) {
|
||||
// Clear the cache/kernel bits.
|
||||
start = start & 0x3FFFFFFF;
|
||||
// Most games only have two framebuffers at the start.
|
||||
if (start >= framebufRangeEnd_ || start < PSP_GetVidMemBase()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NotifyFramebufferCopy(u32 src, u32 dest, int size);
|
||||
|
||||
void DestroyFramebuf(VirtualFramebuffer *vfb);
|
||||
|
||||
@ -211,6 +222,9 @@ private:
|
||||
void CompileDraw2DProgram();
|
||||
void DestroyDraw2DProgram();
|
||||
|
||||
void FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int bpp) const;
|
||||
u32 FramebufferByteSize(const VirtualFramebuffer *vfb) const;
|
||||
|
||||
void SetNumExtraFBOs(int num);
|
||||
|
||||
u32 displayFramebufPtr_;
|
||||
@ -227,11 +241,11 @@ private:
|
||||
VirtualFramebuffer *currentRenderVfb_;
|
||||
|
||||
// Used by ReadFramebufferToMemory and later framebuffer block copies
|
||||
void BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp);
|
||||
void BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, bool flip = false);
|
||||
#ifndef USING_GLES2
|
||||
void PackFramebufferAsync_(VirtualFramebuffer *vfb);
|
||||
#endif
|
||||
void PackFramebufferSync_(VirtualFramebuffer *vfb);
|
||||
void PackFramebufferSync_(VirtualFramebuffer *vfb, int x, int y, int w, int h);
|
||||
|
||||
// Used by DrawPixels
|
||||
unsigned int drawPixelsTex_;
|
||||
@ -239,7 +253,8 @@ private:
|
||||
int drawPixelsTexW_;
|
||||
int drawPixelsTexH_;
|
||||
|
||||
u8 *convBuf;
|
||||
u8 *convBuf_;
|
||||
u32 convBufSize_;
|
||||
GLSLProgram *draw2dprogram_;
|
||||
GLSLProgram *plainColorProgram_;
|
||||
GLSLProgram *postShaderProgram_;
|
||||
@ -257,7 +272,11 @@ private:
|
||||
bool resized_;
|
||||
bool useBufferedRendering_;
|
||||
bool updateVRAM_;
|
||||
|
||||
bool gameUsesSequentialCopies_;
|
||||
|
||||
// The range of PSP memory that may contain FBOs. So we can skip iterating.
|
||||
u32 framebufRangeEnd_;
|
||||
|
||||
std::vector<VirtualFramebuffer *> bvfbs_; // blitting FBOs
|
||||
std::map<std::pair<int, int>, FBO *> renderCopies_;
|
||||
|
||||
|
@ -661,6 +661,10 @@ void GLES_GPU::ProcessEvent(GPUEvent ev) {
|
||||
InvalidateCacheInternal(ev.invalidate_cache.addr, ev.invalidate_cache.size, ev.invalidate_cache.type);
|
||||
break;
|
||||
|
||||
case GPU_EVENT_FB_MEMCPY:
|
||||
UpdateMemoryInternal(ev.fb_memcpy.dst, ev.fb_memcpy.src, ev.fb_memcpy.size);
|
||||
break;
|
||||
|
||||
default:
|
||||
GPUCommon::ProcessEvent(ev);
|
||||
}
|
||||
@ -1908,30 +1912,31 @@ void GLES_GPU::DoBlockTransfer() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?)
|
||||
// Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them
|
||||
// entirely by walking a couple of pointers...
|
||||
if (srcStride == dstStride && width == srcStride) {
|
||||
// Common case in God of War, let's do it all in one chunk.
|
||||
u32 srcLineStartAddr = srcBasePtr + (srcY * srcStride + srcX) * bpp;
|
||||
u32 dstLineStartAddr = dstBasePtr + (dstY * dstStride + dstX) * bpp;
|
||||
const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr);
|
||||
u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr);
|
||||
memcpy(dst, src, width * height * bpp);
|
||||
} else {
|
||||
for (int y = 0; y < height; y++) {
|
||||
u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
|
||||
u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
|
||||
|
||||
// Tell the framebuffer manager to take action if possible. If it does the entire thing, let's just return.
|
||||
if (!framebufferManager_.NotifyBlockTransferBefore(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp)) {
|
||||
// Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?)
|
||||
// Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them
|
||||
// entirely by walking a couple of pointers...
|
||||
if (srcStride == dstStride && width == srcStride) {
|
||||
// Common case in God of War, let's do it all in one chunk.
|
||||
u32 srcLineStartAddr = srcBasePtr + (srcY * srcStride + srcX) * bpp;
|
||||
u32 dstLineStartAddr = dstBasePtr + (dstY * dstStride + dstX) * bpp;
|
||||
const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr);
|
||||
u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr);
|
||||
memcpy(dst, src, width * bpp);
|
||||
}
|
||||
}
|
||||
memcpy(dst, src, width * height * bpp);
|
||||
} else {
|
||||
for (int y = 0; y < height; y++) {
|
||||
u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
|
||||
u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
|
||||
|
||||
const u8 *src = Memory::GetPointerUnchecked(srcLineStartAddr);
|
||||
u8 *dst = Memory::GetPointerUnchecked(dstLineStartAddr);
|
||||
memcpy(dst, src, width * bpp);
|
||||
}
|
||||
}
|
||||
|
||||
// Tell the framebuffer manager to take action if possible. If it does the entire thing, let's just return.
|
||||
if (!framebufferManager_.NotifyBlockTransfer(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp)) {
|
||||
textureCache_.Invalidate(dstBasePtr + (dstY * dstStride + dstX) * bpp, height * dstStride * bpp, GPU_INVALIDATE_HINT);
|
||||
framebufferManager_.NotifyBlockTransferAfter(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp);
|
||||
}
|
||||
|
||||
#ifndef MOBILE_DEVICE
|
||||
@ -1954,19 +1959,46 @@ void GLES_GPU::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType t
|
||||
else
|
||||
textureCache_.InvalidateAll(type);
|
||||
|
||||
if (type != GPU_INVALIDATE_ALL)
|
||||
framebufferManager_.UpdateFromMemory(addr, size, type == GPU_INVALIDATE_SAFE);
|
||||
if (type != GPU_INVALIDATE_ALL && framebufferManager_.MayIntersectFramebuffer(addr)) {
|
||||
// If we're doing block transfers, we shouldn't need this, and it'll only confuse us.
|
||||
// Vempire invalidates (with writeback) after drawing, but before blitting.
|
||||
if (!g_Config.bBlockTransferGPU || type == GPU_INVALIDATE_SAFE) {
|
||||
framebufferManager_.UpdateFromMemory(addr, size, type == GPU_INVALIDATE_SAFE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GLES_GPU::UpdateMemory(u32 dest, u32 src, int size) {
|
||||
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
|
||||
|
||||
// Track stray copies of a framebuffer in RAM. MotoGP does this.
|
||||
if (Memory::IsVRAMAddress(src) && Memory::IsRAMAddress(dest)) {
|
||||
framebufferManager_.NotifyFramebufferCopy(src, dest, size);
|
||||
void GLES_GPU::UpdateMemoryInternal(u32 dest, u32 src, int size) {
|
||||
if (!framebufferManager_.NotifyFramebufferCopy(src, dest, size)) {
|
||||
Memory::Memcpy(dest, Memory::GetPointer(src), size);
|
||||
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
|
||||
} else {
|
||||
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
|
||||
}
|
||||
}
|
||||
|
||||
bool GLES_GPU::UpdateMemory(u32 dest, u32 src, int size) {
|
||||
// Track stray copies of a framebuffer in RAM. MotoGP does this.
|
||||
if (framebufferManager_.MayIntersectFramebuffer(src) || framebufferManager_.MayIntersectFramebuffer(dest)) {
|
||||
if (IsOnSeparateCPUThread()) {
|
||||
GPUEvent ev(GPU_EVENT_FB_MEMCPY);
|
||||
ev.fb_memcpy.dst = dest;
|
||||
ev.fb_memcpy.src = src;
|
||||
ev.fb_memcpy.size = size;
|
||||
ScheduleEvent(ev);
|
||||
|
||||
// This is a memcpy, so we need to wait for it to complete.
|
||||
SyncThread();
|
||||
} else {
|
||||
UpdateMemoryInternal(dest, src, size);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
|
||||
return false;
|
||||
}
|
||||
|
||||
void GLES_GPU::ClearCacheNextFrame() {
|
||||
textureCache_.ClearNextFrame();
|
||||
}
|
||||
|
@ -44,7 +44,7 @@ public:
|
||||
virtual void BeginFrame();
|
||||
virtual void UpdateStats();
|
||||
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type);
|
||||
virtual void UpdateMemory(u32 dest, u32 src, int size);
|
||||
virtual bool UpdateMemory(u32 dest, u32 src, int size);
|
||||
virtual void ClearCacheNextFrame();
|
||||
virtual void DeviceLost(); // Only happens on Android. Drop all textures and shaders.
|
||||
|
||||
@ -151,6 +151,7 @@ private:
|
||||
void InitClearInternal();
|
||||
void BeginFrameInternal();
|
||||
void CopyDisplayToOutputInternal();
|
||||
void UpdateMemoryInternal(u32 dest, u32 src, int size);
|
||||
void InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type);
|
||||
|
||||
static CommandInfo cmdInfo_[256];
|
||||
|
@ -163,6 +163,7 @@ enum GPUEventType {
|
||||
GPU_EVENT_INVALIDATE_CACHE,
|
||||
GPU_EVENT_FINISH_EVENT_LOOP,
|
||||
GPU_EVENT_SYNC_THREAD,
|
||||
GPU_EVENT_FB_MEMCPY,
|
||||
};
|
||||
|
||||
struct GPUEvent {
|
||||
@ -175,6 +176,12 @@ struct GPUEvent {
|
||||
int size;
|
||||
GPUInvalidationType type;
|
||||
} invalidate_cache;
|
||||
// GPU_EVENT_FB_MEMCPY
|
||||
struct {
|
||||
u32 dst;
|
||||
u32 src;
|
||||
int size;
|
||||
} fb_memcpy;
|
||||
};
|
||||
|
||||
operator GPUEventType() const {
|
||||
@ -227,7 +234,7 @@ public:
|
||||
// If size = -1, invalidate everything.
|
||||
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type) = 0;
|
||||
// Update either RAM from VRAM, or VRAM from RAM... or even VRAM from VRAM.
|
||||
virtual void UpdateMemory(u32 dest, u32 src, int size) = 0;
|
||||
virtual bool UpdateMemory(u32 dest, u32 src, int size) = 0;
|
||||
|
||||
// Will cause the texture cache to be cleared at the start of the next frame.
|
||||
virtual void ClearCacheNextFrame() = 0;
|
||||
|
@ -657,7 +657,8 @@ void NullGPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type) {
|
||||
// Nothing to invalidate.
|
||||
}
|
||||
|
||||
void NullGPU::UpdateMemory(u32 dest, u32 src, int size) {
|
||||
bool NullGPU::UpdateMemory(u32 dest, u32 src, int size) {
|
||||
// Nothing to update.
|
||||
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
|
||||
return false;
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ public:
|
||||
virtual void CopyDisplayToOutput() {}
|
||||
virtual void UpdateStats();
|
||||
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type);
|
||||
virtual void UpdateMemory(u32 dest, u32 src, int size);
|
||||
virtual bool UpdateMemory(u32 dest, u32 src, int size);
|
||||
virtual void ClearCacheNextFrame() {};
|
||||
|
||||
virtual void DeviceLost() {}
|
||||
|
@ -851,12 +851,13 @@ void SoftGPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type)
|
||||
// Nothing to invalidate.
|
||||
}
|
||||
|
||||
void SoftGPU::UpdateMemory(u32 dest, u32 src, int size)
|
||||
bool SoftGPU::UpdateMemory(u32 dest, u32 src, int size)
|
||||
{
|
||||
// Nothing to update.
|
||||
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
|
||||
// Let's just be safe.
|
||||
framebufferDirty_ = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SoftGPU::FramebufferDirty() {
|
||||
|
@ -59,7 +59,7 @@ public:
|
||||
virtual void CopyDisplayToOutput();
|
||||
virtual void UpdateStats();
|
||||
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type);
|
||||
virtual void UpdateMemory(u32 dest, u32 src, int size);
|
||||
virtual bool UpdateMemory(u32 dest, u32 src, int size);
|
||||
virtual void ClearCacheNextFrame() {};
|
||||
|
||||
virtual void DeviceLost() {}
|
||||
|
Loading…
Reference in New Issue
Block a user