diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 4fa0d8fab5..ccb68857f2 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -20,6 +20,7 @@ #include "Core/MemMap.h" #include "Core/Config.h" #include "Core/System.h" +#include "Core/Reporting.h" #include "GPU/ge_constants.h" #include "GPU/GPUState.h" @@ -34,653 +35,695 @@ namespace DX9 { -// Aggressively delete unused FBO:s to save gpu memory. -enum { - FBO_OLD_AGE = 5, -}; + // Aggressively delete unused FBO:s to save gpu memory. + enum { + FBO_OLD_AGE = 5, + }; -static bool MaskedEqual(u32 addr1, u32 addr2) { - return (addr1 & 0x03FFFFFF) == (addr2 & 0x03FFFFFF); -} - -inline u16 RGBA8888toRGB565(u32 px) { - return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800); -} - -inline u16 RGBA8888toRGBA4444(u32 px) { - return ((px >> 4) & 0x000F) | ((px >> 8) & 0x00F0) | ((px >> 12) & 0x0F00) | ((px >> 16) & 0xF000); -} - -inline u16 RGBA8888toRGBA5551(u32 px) { - return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000); -} - -static void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format); - -void CenterRect(float *x, float *y, float *w, float *h, - float origW, float origH, float frameW, float frameH) -{ - if (g_Config.bStretchToDisplay) - { - *x = 0; - *y = 0; - *w = frameW; - *h = frameH; - return; + static bool MaskedEqual(u32 addr1, u32 addr2) { + return (addr1 & 0x03FFFFFF) == (addr2 & 0x03FFFFFF); } - float origRatio = origW/origH; - float frameRatio = frameW/frameH; - - if (origRatio > frameRatio) - { - // Image is wider than frame. Center vertically. - float scale = origW / frameW; - *x = 0.0f; - *w = frameW; - *h = frameW / origRatio; - // Stretch a little bit - if (g_Config.bPartialStretch) - *h = (frameH + *h) / 2.0f; // (408 + 720) / 2 = 564 - *y = (frameH - *h) / 2.0f; + inline u16 RGBA8888toRGB565(u32 px) { + return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800); } - else - { - // Image is taller than frame. Center horizontally. - float scale = origH / frameH; - *y = 0.0f; - *h = frameH; - *w = frameH * origRatio; - *x = (frameW - *w) / 2.0f; + + inline u16 RGBA8888toRGBA4444(u32 px) { + return ((px >> 4) & 0x000F) | ((px >> 8) & 0x00F0) | ((px >> 12) & 0x0F00) | ((px >> 16) & 0xF000); } -} -static void ClearBuffer() { - dxstate.depthWrite.set(true); - dxstate.colorMask.set(true, true, true, true); - pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 1, 0); -} + inline u16 RGBA8888toRGBA5551(u32 px) { + return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000); + } -static void DisableState() { - dxstate.blend.disable(); - dxstate.cullMode.set(false, false); - dxstate.depthTest.disable(); - dxstate.scissorTest.disable(); - dxstate.stencilTest.disable(); -} + static void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format); + + void CenterRect(float *x, float *y, float *w, float *h, + float origW, float origH, float frameW, float frameH) { + if (g_Config.bStretchToDisplay) { + *x = 0; + *y = 0; + *w = frameW; + *h = frameH; + return; + } + + float origRatio = origW/origH; + float frameRatio = frameW/frameH; + + if (origRatio > frameRatio) { + // Image is wider than frame. Center vertically. + float scale = origW / frameW; + *x = 0.0f; + *w = frameW; + *h = frameW / origRatio; + // Stretch a little bit + if (g_Config.bPartialStretch) + *h = (frameH + *h) / 2.0f; // (408 + 720) / 2 = 564 + *y = (frameH - *h) / 2.0f; + } else { + // Image is taller than frame. Center horizontally. + float scale = origH / frameH; + *y = 0.0f; + *h = frameH; + *w = frameH * origRatio; + *x = (frameW - *w) / 2.0f; + } + } + + static void ClearBuffer() { + dxstate.depthWrite.set(true); + dxstate.colorMask.set(true, true, true, true); + pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0); + } + + static void DisableState() { + dxstate.blend.disable(); + dxstate.cullMode.set(false, false); + dxstate.depthTest.disable(); + dxstate.scissorTest.disable(); + dxstate.stencilTest.disable(); + } -FramebufferManagerDX9::FramebufferManagerDX9() : + FramebufferManagerDX9::FramebufferManagerDX9() : displayFramebufPtr_(0), - displayStride_(0), - displayFormat_(GE_FORMAT_565), - displayFramebuf_(0), - prevDisplayFramebuf_(0), - prevPrevDisplayFramebuf_(0), - frameLastFramebufUsed(0), - currentRenderVfb_(0), - drawPixelsTex_(0), - drawPixelsTexFormat_(GE_FORMAT_INVALID), - convBuf(0) -{ - // And an initial clear. We don't clear per frame as the games are supposed to handle that - // by themselves. - ClearBuffer(); - -#ifdef _XBOX - pD3Ddevice->CreateTexture(512, 272, 1, 0, D3DFMT(D3DFMT_A8R8G8B8), NULL, &drawPixelsTex_, NULL); -#else - pD3Ddevice->CreateTexture(512, 272, 1, 0, D3DFMT(D3DFMT_A8R8G8B8), D3DPOOL_MANAGED, &drawPixelsTex_, NULL); -#endif - useBufferedRendering_ = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; -} - -FramebufferManagerDX9::~FramebufferManagerDX9() { - if(drawPixelsTex_) { - drawPixelsTex_->Release(); - } - delete [] convBuf; -} - -static inline void ARGB8From4444(u16 c, u32 * dst) { - *dst = ((c & 0xf) << 4) | (((c >> 4) & 0xf) << 12) | (((c >> 8) & 0xf) << 20) | ((c >> 12) << 28); -} -static inline void ARGB8From565(u16 c, u32 * dst) { - *dst = ((c & 0x001f) << 19) | (((c >> 5) & 0x003f) << 11) | ((((c >> 10) & 0x001f) << 3)) | 0xFF000000; -} -static inline void ARGB8From5551(u16 c, u32 * dst) { - *dst = ((c & 0x001f) << 19) | (((c >> 5) & 0x001f) << 11) | ((((c >> 10) & 0x001f) << 3)) | 0xFF000000; -} - -static inline u32 ABGR2RGBA(u32 src) { - return (src >> 8) | (src << 24); -} - -void FramebufferManagerDX9::DrawPixels(const u8 *framebuf, GEBufferFormat pixelFormat, int linesize) { - u8 * convBuf = NULL; - D3DLOCKED_RECT rect; - - drawPixelsTex_->LockRect(0, &rect, NULL, D3DLOCK_NOOVERWRITE); - - convBuf = (u8*)rect.pBits; - - // Final format is ARGB(directx) - - // TODO: We can just change the texture format and flip some bits around instead of this. - if (pixelFormat != GE_FORMAT_8888 || linesize != 512) { - for (int y = 0; y < 272; y++) { - switch (pixelFormat) { - // not tested - case GE_FORMAT_565: - { - const u16 *src = (const u16 *)framebuf + linesize * y; - u32 *dst = (u32*)(convBuf + rect.Pitch * y); - for (int x = 0; x < 480; x++) { - u16_le col0 = src[x+0]; - ARGB8From565(col0, &dst[x + 0]); - } - } - break; - // faster - case GE_FORMAT_5551: - { - const u16 *src = (const u16 *)framebuf + linesize * y; - u32 *dst = (u32*)(convBuf + rect.Pitch * y); - for (int x = 0; x < 480; x++) { - u16_le col0 = src[x+0]; - ARGB8From5551(col0, &dst[x + 0]); - } - } - break; - // not tested - case GE_FORMAT_4444: - { - const u16 *src = (const u16 *)framebuf + linesize * y; - u32 *dst = (u32*)(convBuf + rect.Pitch * y); - for (int x = 0; x < 480; x++) - { - u16_le col = src[x]; - dst[x * 4 + 0] = (col >> 12) << 4; - dst[x * 4 + 1] = ((col >> 8) & 0xf) << 4; - dst[x * 4 + 2] = ((col >> 4) & 0xf) << 4; - dst[x * 4 + 3] = (col & 0xf) << 4; - } - } - break; - - case GE_FORMAT_8888: - { - const u32 *src = (const u32 *)framebuf + linesize * y; - u32 *dst = (u32*)(convBuf + rect.Pitch * y); - for (int x = 0; x < 480; x++) - { - dst[x] = ABGR2RGBA(src[x]); - } - } - break; - } - } - } else { - for (int y = 0; y < 272; y++) { - const u32 *src = (const u32 *)framebuf + linesize * y; - u32 *dst = (u32*)(convBuf + rect.Pitch * y); - for (int x = 0; x < 512; x++) - { - dst[x] = ABGR2RGBA(src[x]); - } - } - } - - drawPixelsTex_->UnlockRect(0); - // D3DXSaveTextureToFile("game:\\cc.png", D3DXIFF_PNG, drawPixelsTex_, NULL); - - pD3Ddevice->SetTexture(0, drawPixelsTex_); - - float x, y, w, h; - CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight); - DrawActiveTexture(x, y, w, h, false, 480.0f / 512.0f); -} - - -void FramebufferManagerDX9::DrawActiveTexture(float x, float y, float w, float h, bool flip, float uscale, float vscale) { - float u2 = uscale; - // Since we're flipping, 0 is down. That's where the scale goes. - float v1 = flip ? 1.0f : 1.0f - vscale; - float v2 = flip ? 1.0f - vscale : 1.0f; - - const float coord[] = { - x, y, 0, 0, v1, - x+w, y, 0, u2, v1, - x+w, y+h, 0, u2, v2, - x, y+h, 0, 0, v2 - }; - - Matrix4x4 ortho; - - ortho.setOrtho(0, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, 0, -1, 1); - ConvertProjMatrixToD3D(ortho); - - //pD3Ddevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_WIREFRAME); - pD3Ddevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE); - pD3Ddevice->SetVertexShaderConstantF(0, ortho.getReadPtr(), 4); - - pD3Ddevice->SetVertexDeclaration(pFramebufferVertexDecl); - pD3Ddevice->SetPixelShader(pFramebufferPixelShader); - pD3Ddevice->SetVertexShader(pFramebufferVertexShader); - //pD3Ddevice->SetTexture(0, drawPixelsTex_); - pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coord, 5 * sizeof(float)); -} - -VirtualFramebufferDX9 *FramebufferManagerDX9::GetDisplayFBO() { - VirtualFramebufferDX9 *match = NULL; - for (size_t i = 0; i < vfbs_.size(); ++i) { - VirtualFramebufferDX9 *v = vfbs_[i]; - if (MaskedEqual(v->fb_address, displayFramebufPtr_) && v->format == displayFormat_ && v->width >= 480) { - // Could check w too but whatever - if (match == NULL || match->last_frame_render < v->last_frame_render) { - match = v; - } - } - } - if (match != NULL) { - return match; - } - - DEBUG_LOG(SCEGE, "Finding no FBO matching address %08x", displayFramebufPtr_); -#if 0 // defined(_DEBUG) - std::string debug = "FBOs: "; - for (size_t i = 0; i < vfbs_.size(); ++i) { - char temp[256]; - sprintf(temp, "%08x %i %i", vfbs_[i]->fb_address, vfbs_[i]->width, vfbs_[i]->height); - debug += std::string(temp); - } - ERROR_LOG(SCEGE, "FBOs: %s", debug.c_str()); -#endif - return 0; -} - -// Heuristics to figure out the size of FBO to create. -static void DrawingSize(int &drawing_width, int &drawing_height) { - int default_width = 480; - int default_height = 272; - int viewport_width = (int) gstate.getViewportX1(); - int viewport_height = (int) gstate.getViewportY1(); - int region_width = gstate.getRegionX2() + 1; - int region_height = gstate.getRegionY2() + 1; - int scissor_width = gstate.getScissorX2() + 1; - int scissor_height = gstate.getScissorY2() + 1; - int fb_width = gstate.fbwidth & 0x3C0; - - DEBUG_LOG(SCEGE,"viewport : %ix%i, region : %ix%i , scissor: %ix%i, stride: %i, %i", viewport_width,viewport_height, region_width, region_height, scissor_width, scissor_height, fb_width, gstate.isModeThrough()); - - // Viewport may return 0x0 for example FF Type-0 and we set it to 480x272 - if (viewport_width <= 1 && viewport_height <=1) { - viewport_width = default_width; - viewport_height = default_height; - } - - if (fb_width > 0 && fb_width < 512) { - // Correct scissor size has to be used to render like character shadow in Mortal Kombat . - if (fb_width == scissor_width && region_width != scissor_width) { - drawing_width = scissor_width; - drawing_height = scissor_height; - } else { - drawing_width = viewport_width; - drawing_height = viewport_height; - } - } else { - // Correct region size has to be used when fb_width equals to region_width for exmaple GTA/Midnight Club/MSG Peace Maker . - if (fb_width == region_width && region_width != scissor_width) { - drawing_width = region_width; - drawing_height = region_height; - } else { - drawing_width = default_width; - drawing_height = default_height; - } - } -} - -void FramebufferManagerDX9::DestroyFramebuf(VirtualFramebufferDX9 *v) { - textureCache_->NotifyFramebuffer(v->fb_address, v, NOTIFY_FB_DESTROYED); - if (v->fbo) { - fbo_destroy(v->fbo); - v->fbo = 0; - } - - // Wipe some pointers - if (currentRenderVfb_ == v) - currentRenderVfb_ = 0; - if (displayFramebuf_ == v) - displayFramebuf_ = 0; - if (prevDisplayFramebuf_ == v) - prevDisplayFramebuf_ = 0; - if (prevPrevDisplayFramebuf_ == v) - prevPrevDisplayFramebuf_ = 0; - - delete v; -} - -void FramebufferManagerDX9::SetRenderFrameBuffer() { - if (!gstate_c.framebufChanged && currentRenderVfb_) { - currentRenderVfb_->last_frame_render = gpuStats.numFlips; - currentRenderVfb_->dirtyAfterDisplay = true; - if (!gstate_c.skipDrawReason) - currentRenderVfb_->reallyDirtyAfterDisplay = true; - return; - } - gstate_c.framebufChanged = false; - - // Get parameters - u32 fb_address = gstate.getFrameBufRawAddress(); - int fb_stride = gstate.fbwidth & 0x3C0; - - u32 z_address = gstate.getDepthBufRawAddress(); - int z_stride = gstate.zbwidth & 0x3C0; - - // Yeah this is not completely right. but it'll do for now. - //int drawing_width = ((gstate.region2) & 0x3FF) + 1; - //int drawing_height = ((gstate.region2 >> 10) & 0x3FF) + 1; - - // As there are no clear "framebuffer width" and "framebuffer height" registers, - // we need to infer the size of the current framebuffer somehow. Let's try the viewport. - - GEBufferFormat fmt = gstate.FrameBufFormat(); - - int drawing_width, drawing_height; - DrawingSize(drawing_width, drawing_height); - - int buffer_width = drawing_width; - int buffer_height = drawing_height; - - // Find a matching framebuffer - VirtualFramebufferDX9 *vfb = 0; - for (size_t i = 0; i < vfbs_.size(); ++i) { - VirtualFramebufferDX9 *v = vfbs_[i]; - if (MaskedEqual(v->fb_address, fb_address) && v->format == fmt) { - // Let's not be so picky for now. Let's say this is the one. - vfb = v; - // Update fb stride in case it changed - vfb->fb_stride = fb_stride; - vfb->format = fmt; - if (v->bufferWidth >= drawing_width && v->bufferHeight >= drawing_height) { - v->width = drawing_width; - v->height = drawing_height; - } - break; - } - } - - float renderWidthFactor = (float)PSP_CoreParameter().renderWidth / 480.0f; - float renderHeightFactor = (float)PSP_CoreParameter().renderHeight / 272.0f; - - // None found? Create one. - if (!vfb) { - gstate_c.textureChanged = TEXCHANGE_UPDATED; - vfb = new VirtualFramebufferDX9(); - vfb->fbo = 0; - vfb->fb_address = fb_address; - vfb->fb_stride = fb_stride; - vfb->z_address = z_address; - vfb->z_stride = z_stride; - vfb->width = drawing_width; - vfb->height = drawing_height; - vfb->renderWidth = (u16)(drawing_width * renderWidthFactor); - vfb->renderHeight = (u16)(drawing_height * renderHeightFactor); - vfb->bufferWidth = buffer_width; - vfb->bufferHeight = buffer_height; - vfb->format = fmt; - vfb->usageFlags = FB_USAGE_RENDERTARGET; - vfb->dirtyAfterDisplay = true; - if ((gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0) - vfb->reallyDirtyAfterDisplay = true; - vfb->memoryUpdated = false; - - if (g_Config.bTrueColor) { - vfb->colorDepth = FBO_8888; - } else { - switch (fmt) { - case GE_FORMAT_4444: - vfb->colorDepth = FBO_4444; - break; - case GE_FORMAT_5551: - vfb->colorDepth = FBO_5551; - break; - case GE_FORMAT_565: - vfb->colorDepth = FBO_565; - break; - case GE_FORMAT_8888: - vfb->colorDepth = FBO_8888; - break; - default: - vfb->colorDepth = FBO_8888; - break; - } - } - - //#ifdef ANDROID - // vfb->colorDepth = FBO_8888; - //#endif - - if (useBufferedRendering_) { - vfb->fbo = fbo_create(vfb->renderWidth, vfb->renderHeight, 1, true, vfb->colorDepth); - if (vfb->fbo) { - fbo_bind_as_render_target(vfb->fbo); - } else { - ERROR_LOG(SCEGE, "Error creating FBO! %i x %i", vfb->renderWidth, vfb->renderHeight); - } - } else { - fbo_unbind(); - // Let's ignore rendering to targets that have not (yet) been displayed. - gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB; - } - - textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_CREATED); - - vfb->last_frame_render = gpuStats.numFlips; - frameLastFramebufUsed = gpuStats.numFlips; - vfbs_.push_back(vfb); - ClearBuffer(); - - currentRenderVfb_ = vfb; - - INFO_LOG(SCEGE, "Creating FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->format); - - // We already have it! - } else if (vfb != currentRenderVfb_) { - // TODO: This doesn't make sense for DirectX? -#ifndef USING_GLES2 - bool useMem = g_Config.iRenderingMode == FB_READFBOMEMORY_GPU || g_Config.iRenderingMode == FB_READFBOMEMORY_CPU; -#else - bool useMem = g_Config.iRenderingMode == FB_READFBOMEMORY_GPU; -#endif - if(useMem && !vfb->memoryUpdated) { - ReadFramebufferToMemory(vfb, true); - } - // Use it as a render target. - DEBUG_LOG(SCEGE, "Switching render target to FBO for %08x: %i x %i x %i ", vfb->fb_address, vfb->width, vfb->height, vfb->format); - vfb->usageFlags |= FB_USAGE_RENDERTARGET; - gstate_c.textureChanged = TEXCHANGE_UPDATED; - vfb->last_frame_render = gpuStats.numFlips; - frameLastFramebufUsed = gpuStats.numFlips; - vfb->dirtyAfterDisplay = true; - if ((gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0) - vfb->reallyDirtyAfterDisplay = true; - vfb->memoryUpdated = false; - - if (useBufferedRendering_) { - if (vfb->fbo) { - fbo_bind_as_render_target(vfb->fbo); - } else { - // wtf? This should only happen very briefly when toggling bBufferedRendering - fbo_unbind(); - } - } else { - if (vfb->fbo) { - // wtf? This should only happen very briefly when toggling bBufferedRendering - textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_DESTROYED); - fbo_destroy(vfb->fbo); - vfb->fbo = 0; - } - fbo_unbind(); - - // Let's ignore rendering to targets that have not (yet) been displayed. - if (vfb->usageFlags & FB_USAGE_DISPLAYED_FRAMEBUFFER) - gstate_c.skipDrawReason &= ~SKIPDRAW_NON_DISPLAYED_FB; - else - gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB; - - /* - if (drawing_width == 480 && drawing_height == 272) { - gstate_c.skipDrawReason &= ~SKIPDRAW_SKIPNONFB; - // OK! - } else { - gstate_c.skipDrawReason |= ~SKIPDRAW_SKIPNONFB; - }*/ - } - textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED); - -#if 0 - // Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering - // to it. This broke stuff before, so now it only clears on the first use of an - // FBO in a frame. This means that some games won't be able to avoid the on-some-GPUs - // performance-crushing framebuffer reloads from RAM, but we'll have to live with that. - if (vfb->last_frame_render != gpuStats.numFlips) { - ClearBuffer(); - } -#endif - currentRenderVfb_ = vfb; - } else { - vfb->last_frame_render = gpuStats.numFlips; - frameLastFramebufUsed = gpuStats.numFlips; - vfb->dirtyAfterDisplay = true; - if ((gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0) - vfb->reallyDirtyAfterDisplay = true; - } - - // ugly... - if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { - shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); - gstate_c.curRTWidth = vfb->width; - gstate_c.curRTHeight = vfb->height; - } -} - -void FramebufferManagerDX9::CopyDisplayToOutput() { - -#ifdef _XBOX - //if (currentRenderVfb_ && (!currentRenderVfb_->usageFlags & FB_USAGE_DISPLAYED_FRAMEBUFFER)) - //if (currentRenderVfb_ && (currentRenderVfb_->usageFlags == FB_USAGE_RENDERTARGET)) - //if (currentRenderVfb_ && (currentRenderVfb_->fb_address == 0)) - - if (currentRenderVfb_) + displayStride_(0), + displayFormat_(GE_FORMAT_565), + displayFramebuf_(0), + prevDisplayFramebuf_(0), + prevPrevDisplayFramebuf_(0), + frameLastFramebufUsed(0), + currentRenderVfb_(0), + drawPixelsTex_(0), + drawPixelsTexFormat_(GE_FORMAT_INVALID), + convBuf(0) { - if (currentRenderVfb_->fbo && currentRenderVfb_->usageFlags == FB_USAGE_RENDERTARGET) { - fbo_resolve(currentRenderVfb_->fbo); - } - } - -#endif - - - fbo_unbind(); - - currentRenderVfb_ = 0; - - VirtualFramebufferDX9 *vfb = GetDisplayFBO(); - if (!vfb) { - if (Memory::IsValidAddress(ramDisplayFramebufPtr_)) { - // The game is displaying something directly from RAM. In GTA, it's decoded video. - DrawPixels(Memory::GetPointer(ramDisplayFramebufPtr_), displayFormat_, displayStride_); - } else if (Memory::IsValidAddress(displayFramebufPtr_)) { - // The game is displaying something directly from RAM. In GTA, it's decoded video. - DrawPixels(Memory::GetPointer(displayFramebufPtr_), displayFormat_, displayStride_); - } else { - DEBUG_LOG(SCEGE, "Found no FBO to display! displayFBPtr = %08x", displayFramebufPtr_); - // No framebuffer to display! Clear to black. - ClearBuffer(); - } - return; - } - - vfb->usageFlags |= FB_USAGE_DISPLAYED_FRAMEBUFFER; - vfb->dirtyAfterDisplay = false; - vfb->reallyDirtyAfterDisplay = false; - - if (prevDisplayFramebuf_ != displayFramebuf_) { - prevPrevDisplayFramebuf_ = prevDisplayFramebuf_; - } - if (displayFramebuf_ != vfb) { - prevDisplayFramebuf_ = displayFramebuf_; - } - displayFramebuf_ = vfb; - - if (resized_) { + // And an initial clear. We don't clear per frame as the games are supposed to handle that + // by themselves. ClearBuffer(); + pD3Ddevice->CreateTexture(512, 272, 1, 0, D3DFMT(D3DFMT_A8R8G8B8), D3DPOOL_MANAGED, &drawPixelsTex_, NULL); + useBufferedRendering_ = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; } - if (vfb->fbo) { - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); - DEBUG_LOG(SCEGE, "Displaying FBO %08x", vfb->fb_address); - DisableState(); - - fbo_bind_color_as_texture(vfb->fbo, 0); - - // These are in the output display coordinates - float x, y, w, h; - CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight); - DrawActiveTexture(x, y, w, h, false, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height); - pD3Ddevice->SetTexture(0, NULL); - } -} - -void FramebufferManagerDX9::ReadFramebufferToMemory(VirtualFramebufferDX9 *vfb, bool sync) { - // This only works with buffered rendering - if (!useBufferedRendering_) { - return; + FramebufferManagerDX9::~FramebufferManagerDX9() { + if(drawPixelsTex_) { + drawPixelsTex_->Release(); + } + delete [] convBuf; } + static inline void ARGB8From4444(u16 c, u32 * dst) { + *dst = ((c & 0xf) << 4) | (((c >> 4) & 0xf) << 12) | (((c >> 8) & 0xf) << 20) | ((c >> 12) << 28); + } + static inline void ARGB8From565(u16 c, u32 * dst) { + *dst = ((c & 0x001f) << 19) | (((c >> 5) & 0x003f) << 11) | ((((c >> 10) & 0x001f) << 3)) | 0xFF000000; + } + static inline void ARGB8From5551(u16 c, u32 * dst) { + *dst = ((c & 0x001f) << 19) | (((c >> 5) & 0x001f) << 11) | ((((c >> 10) & 0x001f) << 3)) | 0xFF000000; + } - if(vfb) { - // We'll pseudo-blit framebuffers here to get a resized and flipped version of vfb. - // For now we'll keep these on the same struct as the ones that can get displayed - // (and blatantly copy work already done above while at it). - VirtualFramebufferDX9 *nvfb = 0; + static inline u32 ABGR2RGBA(u32 src) { + return (src >> 8) | (src << 24); + } - // We maintain a separate vector of framebuffer objects for blitting. - for (size_t i = 0; i < bvfbs_.size(); ++i) { - VirtualFramebufferDX9 *v = bvfbs_[i]; - if (MaskedEqual(v->fb_address, vfb->fb_address) && v->format == vfb->format) { - if (v->bufferWidth == vfb->bufferWidth && v->bufferHeight == vfb->bufferHeight) { - nvfb = v; - v->fb_stride = vfb->fb_stride; - v->width = vfb->width; - v->height = vfb->height; + void FramebufferManagerDX9::DrawPixels(const u8 *framebuf, GEBufferFormat pixelFormat, int linesize) { + u8 * convBuf = NULL; + D3DLOCKED_RECT rect; + + drawPixelsTex_->LockRect(0, &rect, NULL, D3DLOCK_NOOVERWRITE); + + convBuf = (u8*)rect.pBits; + + // Final format is ARGB(directx) + + // TODO: We can just change the texture format and flip some bits around instead of this. + if (pixelFormat != GE_FORMAT_8888 || linesize != 512) { + for (int y = 0; y < 272; y++) { + switch (pixelFormat) { + // not tested + case GE_FORMAT_565: + { + const u16 *src = (const u16 *)framebuf + linesize * y; + u32 *dst = (u32*)(convBuf + rect.Pitch * y); + for (int x = 0; x < 480; x++) { + u16_le col0 = src[x+0]; + ARGB8From565(col0, &dst[x + 0]); + } + } + break; + // faster + case GE_FORMAT_5551: + { + const u16 *src = (const u16 *)framebuf + linesize * y; + u32 *dst = (u32*)(convBuf + rect.Pitch * y); + for (int x = 0; x < 480; x++) { + u16_le col0 = src[x+0]; + ARGB8From5551(col0, &dst[x + 0]); + } + } + break; + // not tested + case GE_FORMAT_4444: + { + const u16 *src = (const u16 *)framebuf + linesize * y; + u32 *dst = (u32*)(convBuf + rect.Pitch * y); + for (int x = 0; x < 480; x++) + { + u16_le col = src[x]; + dst[x * 4 + 0] = (col >> 12) << 4; + dst[x * 4 + 1] = ((col >> 8) & 0xf) << 4; + dst[x * 4 + 2] = ((col >> 4) & 0xf) << 4; + dst[x * 4 + 3] = (col & 0xf) << 4; + } + } + break; + + case GE_FORMAT_8888: + { + const u32 *src = (const u32 *)framebuf + linesize * y; + u32 *dst = (u32*)(convBuf + rect.Pitch * y); + for (int x = 0; x < 480; x++) + { + dst[x] = ABGR2RGBA(src[x]); + } + } break; } } + } else { + for (int y = 0; y < 272; y++) { + const u32 *src = (const u32 *)framebuf + linesize * y; + u32 *dst = (u32*)(convBuf + rect.Pitch * y); + for (int x = 0; x < 512; x++) + { + dst[x] = ABGR2RGBA(src[x]); + } + } } - // Create a new fbo if none was found for the size - if(!nvfb) { - nvfb = new VirtualFramebufferDX9(); - nvfb->fbo = 0; - nvfb->fb_address = vfb->fb_address; - nvfb->fb_stride = vfb->fb_stride; - nvfb->z_address = vfb->z_address; - nvfb->z_stride = vfb->z_stride; - nvfb->width = vfb->width; - nvfb->height = vfb->height; - nvfb->renderWidth = vfb->width; - nvfb->renderHeight = vfb->height; - nvfb->bufferWidth = vfb->bufferWidth; - nvfb->bufferHeight = vfb->bufferHeight; - nvfb->format = vfb->format; - nvfb->usageFlags = FB_USAGE_RENDERTARGET; - nvfb->dirtyAfterDisplay = true; + drawPixelsTex_->UnlockRect(0); + // D3DXSaveTextureToFile("game:\\cc.png", D3DXIFF_PNG, drawPixelsTex_, NULL); - if(g_Config.bTrueColor) { - nvfb->colorDepth = FBO_8888; + float x, y, w, h; + CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight); + DrawActiveTexture(drawPixelsTex_, x, y, w, h, false, 480.0f / 512.0f); + } + + // Depth in ogl is between -1;1 we need between 0;1 + static void ConvertMatrices(Matrix4x4 & in) { + /* + in.zz *= 0.5f; + in.wz += 1.f; + */ + Matrix4x4 s; + Matrix4x4 t; + s.setScaling(Vec3(1, 1, 0.5f)); + t.setTranslation(Vec3(0, 0, 0.5f)); + in = in * s; + in = in * t; + } + + void FramebufferManagerDX9::DrawActiveTexture(LPDIRECT3DTEXTURE9 tex, float x, float y, float w, float h, float destW, float destH, bool flip, float uscale, float vscale) { + float u2 = uscale; + // Since we're flipping, 0 is down. That's where the scale goes. + float v1 = flip ? 1.0f : 1.0f - vscale; + float v2 = flip ? 1.0f - vscale : 1.0f; + + float coord[] = { + x, y, 0, 0, v1, + x+w, y, 0, u2, v1, + x+w, y+h, 0, u2, v2, + x, y+h, 0, 0, v2 + }; + + for (int i = 0; i < 4; i++) { + coord[i * 5] = coord[i * 5] / (destW * 0.5) - 1.0f; + coord[i * 5 + 1] = -(coord[i * 5 + 1] / (destH * 0.5) - 1.0f); + } + + //pD3Ddevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_WIREFRAME); + pD3Ddevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE); + pD3Ddevice->SetVertexDeclaration(pFramebufferVertexDecl); + pD3Ddevice->SetPixelShader(pFramebufferPixelShader); + pD3Ddevice->SetVertexShader(pFramebufferVertexShader); + if (tex != NULL) { + pD3Ddevice->SetTexture(0, tex); + } + pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coord, 5 * sizeof(float)); + } + + + VirtualFramebufferDX9 *FramebufferManagerDX9::GetVFBAt(u32 addr) { + VirtualFramebufferDX9 *match = NULL; + for (size_t i = 0; i < vfbs_.size(); ++i) { + VirtualFramebufferDX9 *v = vfbs_[i]; + if (MaskedEqual(v->fb_address, addr) && v->format == displayFormat_ && v->width >= 480) { + // Could check w too but whatever + if (match == NULL || match->last_frame_render < v->last_frame_render) { + match = v; + } + } + } + if (match != NULL) { + return match; + } + + DEBUG_LOG(SCEGE, "Finding no FBO matching address %08x", addr); + return 0; + } + + + + // Heuristics to figure out the size of FBO to create. + static void EstimateDrawingSize(int &drawing_width, int &drawing_height) { + int default_width = 480; + int default_height = 272; + int viewport_width = (int) gstate.getViewportX1(); + int viewport_height = (int) gstate.getViewportY1(); + int region_width = gstate.getRegionX2() + 1; + int region_height = gstate.getRegionY2() + 1; + int scissor_width = gstate.getScissorX2() + 1; + int scissor_height = gstate.getScissorY2() + 1; + int fb_stride = gstate.fbwidth & 0x3C0; + + DEBUG_LOG(SCEGE,"viewport : %ix%i, region : %ix%i , scissor: %ix%i, stride: %i, %i", viewport_width,viewport_height, region_width, region_height, scissor_width, scissor_height, fb_stride, gstate.isModeThrough()); + + // Viewport may return 0x0 for example FF Type-0 and we set it to 480x272 + if (viewport_width <= 1 && viewport_height <=1) { + viewport_width = default_width; + viewport_height = default_height; + } + + if (fb_stride > 0 && fb_stride < 512) { + // Correct scissor size has to be used to render like character shadow in Mortal Kombat . + if (fb_stride == scissor_width && region_width != scissor_width) { + drawing_width = scissor_width; + drawing_height = scissor_height; } else { + drawing_width = viewport_width; + drawing_height = viewport_height; + } + } else { + // Correct region size has to be used when fb_width equals to region_width for exmaple GTA/Midnight Club/MSG Peace Maker . + if (fb_stride == region_width && region_width == viewport_width) { + drawing_width = region_width; + drawing_height = region_height; + } else if (fb_stride == viewport_width) { + drawing_width = viewport_width; + drawing_height = viewport_height; + } else { + drawing_width = default_width; + drawing_height = default_height; + } + } + } + + void FramebufferManagerDX9::DestroyFramebuf(VirtualFramebufferDX9 *v) { + textureCache_->NotifyFramebuffer(v->fb_address, v, NOTIFY_FB_DESTROYED); + if (v->fbo) { + fbo_destroy(v->fbo); + v->fbo = 0; + } + + // Wipe some pointers + if (currentRenderVfb_ == v) + currentRenderVfb_ = 0; + if (displayFramebuf_ == v) + displayFramebuf_ = 0; + if (prevDisplayFramebuf_ == v) + prevDisplayFramebuf_ = 0; + if (prevPrevDisplayFramebuf_ == v) + prevPrevDisplayFramebuf_ = 0; + + delete v; + } + + void FramebufferManagerDX9::SetRenderFrameBuffer() { + if (!gstate_c.framebufChanged && currentRenderVfb_) { + currentRenderVfb_->last_frame_render = gpuStats.numFlips; + currentRenderVfb_->dirtyAfterDisplay = true; + if (!gstate_c.skipDrawReason) + currentRenderVfb_->reallyDirtyAfterDisplay = true; + return; + } +#if 0 + if (g_Config.iRenderingMode != 0 && g_Config.bWipeFramebufferAlpha && currentRenderVfb_) { + // Hack is enabled, and there was a previous framebuffer. + // Before we switch, let's do a series of trickery to copy one bit of stencil to + // destination alpha. Or actually, this is just a bunch of hackery attempts on Wipeout. + // Ignore for now. + /* + glstate.depthTest.disable(); + glstate.colorMask.set(GL_FALSE, GL_FALSE, GL_FALSE, GL_TRUE); + glstate.stencilTest.enable(); + glstate.stencilOp.set(GL_KEEP, GL_KEEP, GL_KEEP); // don't modify stencil? + glstate.stencilFunc.set(GL_GEQUAL, 0xFE, 0xFF); + DrawPlainColor(0x00000000); + //glstate.stencilFunc.set(GL_LESS, 0x80, 0xFF); + //DrawPlainColor(0xFF000000); + glstate.stencilTest.disable(); + glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + */ + + glstate.depthTest.disable(); + glstate.colorMask.set(GL_FALSE, GL_FALSE, GL_FALSE, GL_TRUE); + DrawPlainColor(0x00000000); + shaderManager_->DirtyLastShader(); // dirty lastShader_ + } +#endif + gstate_c.framebufChanged = false; + + // Get parameters + u32 fb_address = gstate.getFrameBufRawAddress(); + int fb_stride = gstate.fbwidth & 0x3C0; + + u32 z_address = gstate.getDepthBufRawAddress(); + int z_stride = gstate.zbwidth & 0x3C0; + + // Yeah this is not completely right. but it'll do for now. + //int drawing_width = ((gstate.region2) & 0x3FF) + 1; + //int drawing_height = ((gstate.region2 >> 10) & 0x3FF) + 1; + + GEBufferFormat fmt = gstate.FrameBufFormat(); + + // As there are no clear "framebuffer width" and "framebuffer height" registers, + // we need to infer the size of the current framebuffer somehow. + int drawing_width, drawing_height; + EstimateDrawingSize(drawing_width, drawing_height); + + int buffer_width = drawing_width; + int buffer_height = drawing_height; + + // Find a matching framebuffer + VirtualFramebufferDX9 *vfb = 0; + for (size_t i = 0; i < vfbs_.size(); ++i) { + VirtualFramebufferDX9 *v = vfbs_[i]; + if (MaskedEqual(v->fb_address, fb_address) && v->width >= drawing_width && v->height >= drawing_height) { + // Let's not be so picky for now. Let's say this is the one. + vfb = v; + // Update fb stride in case it changed + vfb->fb_stride = fb_stride; + v->format = fmt; + break; + } + } + + float renderWidthFactor = (float)PSP_CoreParameter().renderWidth / 480.0f; + float renderHeightFactor = (float)PSP_CoreParameter().renderHeight / 272.0f; + + // None found? Create one. + if (!vfb) { + gstate_c.textureChanged = true; + vfb = new VirtualFramebufferDX9(); + vfb->fbo = 0; + vfb->fb_address = fb_address; + vfb->fb_stride = fb_stride; + vfb->z_address = z_address; + vfb->z_stride = z_stride; + vfb->width = drawing_width; + vfb->height = drawing_height; + vfb->renderWidth = (u16)(drawing_width * renderWidthFactor); + vfb->renderHeight = (u16)(drawing_height * renderHeightFactor); + vfb->bufferWidth = buffer_width; + vfb->bufferHeight = buffer_height; + vfb->format = fmt; + vfb->usageFlags = FB_USAGE_RENDERTARGET; + vfb->dirtyAfterDisplay = true; + if ((gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0) + vfb->reallyDirtyAfterDisplay = true; + vfb->memoryUpdated = false; + + if (g_Config.bTrueColor) { + vfb->colorDepth = FBO_8888; + } else { + switch (fmt) { + case GE_FORMAT_4444: + vfb->colorDepth = FBO_4444; + break; + case GE_FORMAT_5551: + vfb->colorDepth = FBO_5551; + break; + case GE_FORMAT_565: + vfb->colorDepth = FBO_565; + break; + case GE_FORMAT_8888: + vfb->colorDepth = FBO_8888; + break; + default: + vfb->colorDepth = FBO_8888; + break; + } + } + + if (useBufferedRendering_) { + vfb->fbo = fbo_create(vfb->renderWidth, vfb->renderHeight, 1, true, vfb->colorDepth); + if (vfb->fbo) { + fbo_bind_as_render_target(vfb->fbo); + } else { + ERROR_LOG(SCEGE, "Error creating FBO! %i x %i", vfb->renderWidth, vfb->renderHeight); + } + } else { + fbo_unbind(); + // Let's ignore rendering to targets that have not (yet) been displayed. + gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB; + } + + textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_CREATED); + + vfb->last_frame_render = gpuStats.numFlips; + frameLastFramebufUsed = gpuStats.numFlips; + vfbs_.push_back(vfb); + ClearBuffer(); + + currentRenderVfb_ = vfb; + + INFO_LOG(SCEGE, "Creating FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->format); + + // Let's check for depth buffer overlap. Might be interesting. + bool sharingReported = false; + for (size_t i = 0, end = vfbs_.size(); i < end; ++i) { + if (MaskedEqual(fb_address, vfbs_[i]->z_address)) { + WARN_LOG_REPORT(SCEGE, "FBO created from existing depthbuffer (unsupported), %08x/%08x and %08x/%08x", fb_address, z_address, vfbs_[i]->fb_address, vfbs_[i]->z_address); + } else if (MaskedEqual(z_address, vfbs_[i]->fb_address)) { + WARN_LOG_REPORT(SCEGE, "FBO using other buffer as depthbuffer (unsupported), %08x/%08x and %08x/%08x", fb_address, z_address, vfbs_[i]->fb_address, vfbs_[i]->z_address); + } else if (MaskedEqual(z_address, vfbs_[i]->z_address) && fb_address != vfbs_[i]->fb_address && !sharingReported) { + WARN_LOG_REPORT(SCEGE, "FBO sharing existing depthbuffer (unsupported), %08x/%08x and %08x/%08x", fb_address, z_address, vfbs_[i]->fb_address, vfbs_[i]->z_address); + sharingReported = true; + } + } + + // We already have it! + } else if (vfb != currentRenderVfb_) { + bool updateVRAM = !(g_Config.iRenderingMode == FB_NON_BUFFERED_MODE || g_Config.iRenderingMode == FB_BUFFERED_MODE); + + if (updateVRAM && !vfb->memoryUpdated) { + ReadFramebufferToMemory(vfb, true); + } + // Use it as a render target. + DEBUG_LOG(SCEGE, "Switching render target to FBO for %08x: %i x %i x %i ", vfb->fb_address, vfb->width, vfb->height, vfb->format); + vfb->usageFlags |= FB_USAGE_RENDERTARGET; + gstate_c.textureChanged = true; + vfb->last_frame_render = gpuStats.numFlips; + frameLastFramebufUsed = gpuStats.numFlips; + vfb->dirtyAfterDisplay = true; + if ((gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0) + vfb->reallyDirtyAfterDisplay = true; + vfb->memoryUpdated = false; + + if (useBufferedRendering_) { + if (vfb->fbo) { + fbo_bind_as_render_target(vfb->fbo); + } else { + // wtf? This should only happen very briefly when toggling bBufferedRendering + fbo_unbind(); + } + } else { + if (vfb->fbo) { + // wtf? This should only happen very briefly when toggling bBufferedRendering + textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_DESTROYED); + fbo_destroy(vfb->fbo); + vfb->fbo = 0; + } + fbo_unbind(); + + // Let's ignore rendering to targets that have not (yet) been displayed. + if (vfb->usageFlags & FB_USAGE_DISPLAYED_FRAMEBUFFER) { + gstate_c.skipDrawReason &= ~SKIPDRAW_NON_DISPLAYED_FB; + } else { + gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB; + } + } + textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED); + +#if 0 + // Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering + // to it. This broke stuff before, so now it only clears on the first use of an + // FBO in a frame. This means that some games won't be able to avoid the on-some-GPUs + // performance-crushing framebuffer reloads from RAM, but we'll have to live with that. + if (vfb->last_frame_render != gpuStats.numFlips) { + ClearBuffer(); + } +#endif + currentRenderVfb_ = vfb; + } else { + vfb->last_frame_render = gpuStats.numFlips; + frameLastFramebufUsed = gpuStats.numFlips; + vfb->dirtyAfterDisplay = true; + if ((gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0) + vfb->reallyDirtyAfterDisplay = true; + } + + // ugly... + if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { + shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX); + gstate_c.curRTWidth = vfb->width; + gstate_c.curRTHeight = vfb->height; + } + } + + void FramebufferManagerDX9::CopyDisplayToOutput() { + + fbo_unbind(); + dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + currentRenderVfb_ = 0; + + VirtualFramebufferDX9 *vfb = GetVFBAt(displayFramebufPtr_); + if (!vfb) { + if (Memory::IsValidAddress(displayFramebufPtr_)) { + // The game is displaying something directly from RAM. In GTA, it's decoded video. + + // First check that it's not a known RAM copy of a VRAM framebuffer though, as in MotoGP + for (auto iter = knownFramebufferCopies_.begin(); iter != knownFramebufferCopies_.end(); ++iter) { + if (iter->second == displayFramebufPtr_) { + vfb = GetVFBAt(iter->first); + } + } + + if (!vfb) { + // Just a pointer to plain memory to draw. Draw it. + DrawPixels(Memory::GetPointer(displayFramebufPtr_), displayFormat_, displayStride_); + return; + } + } else { + DEBUG_LOG(SCEGE, "Found no FBO to display! displayFBPtr = %08x", displayFramebufPtr_); + // No framebuffer to display! Clear to black. + ClearBuffer(); + return; + } + } + + vfb->usageFlags |= FB_USAGE_DISPLAYED_FRAMEBUFFER; + vfb->dirtyAfterDisplay = false; + vfb->reallyDirtyAfterDisplay = false; + + if (prevDisplayFramebuf_ != displayFramebuf_) { + prevPrevDisplayFramebuf_ = prevDisplayFramebuf_; + } + if (displayFramebuf_ != vfb) { + prevDisplayFramebuf_ = displayFramebuf_; + } + displayFramebuf_ = vfb; + + if (resized_) { + ClearBuffer(); + } + + if (vfb->fbo) { + DEBUG_LOG(SCEGE, "Displaying FBO %08x", vfb->fb_address); + DisableState(); + LPDIRECT3DTEXTURE9 colorTexture = fbo_get_color_texture(vfb->fbo); + + // Output coordinates + float x, y, w, h; + CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight); + + // TODO ES3: Use glInvalidateFramebuffer to discard depth/stencil data at the end of frame. + // and to discard extraFBOs_ after using them. + + if (1) { + dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + // These are in the output display coordinates + DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, true, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height); + } + /* + else if (usePostShader_ && extraFBOs_.size() == 1 && !postShaderAtOutputResolution_) { + // An additional pass, post-processing shader to the extra FBO. + fbo_bind_as_render_target(extraFBOs_[0]); + int fbo_w, fbo_h; + fbo_get_dimensions(extraFBOs_[0], &fbo_w, &fbo_h); + glstate.viewport.set(0, 0, fbo_w, fbo_h); + DrawActiveTexture(colorTexture, 0, 0, fbo_w, fbo_h, fbo_w, fbo_h, true, 1.0f, 1.0f, postShaderProgram_); + + fbo_unbind(); + + // Use the extra FBO, with applied post-processing shader, as a texture. + // fbo_bind_color_as_texture(extraFBOs_[0], 0); + if (extraFBOs_.size() == 0) { + ERROR_LOG(G3D, "WTF?"); + return; + } + colorTexture = fbo_get_color_texture(extraFBOs_[0]); + glstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + // These are in the output display coordinates + DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, true, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height); + } else { + // Use post-shader, but run shader at output resolution. + glstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + // These are in the output display coordinates + DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, true, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height, postShaderProgram_); + } + */ + pD3Ddevice->SetTexture(0, NULL); + } + } + + void FramebufferManagerDX9::ReadFramebufferToMemory(VirtualFramebufferDX9 *vfb, bool sync) { +#if 0 + if (sync) { + PackFramebufferAsync_(NULL); // flush async just in case when we go for synchronous update + } +#endif + + if(vfb) { + // We'll pseudo-blit framebuffers here to get a resized and flipped version of vfb. + // For now we'll keep these on the same struct as the ones that can get displayed + // (and blatantly copy work already done above while at it). + VirtualFramebufferDX9 *nvfb = 0; + + // We maintain a separate vector of framebuffer objects for blitting. + for (size_t i = 0; i < bvfbs_.size(); ++i) { + VirtualFramebufferDX9 *v = bvfbs_[i]; + if (MaskedEqual(v->fb_address, vfb->fb_address) && v->format == vfb->format) { + if (v->bufferWidth == vfb->bufferWidth && v->bufferHeight == vfb->bufferHeight) { + nvfb = v; + v->fb_stride = vfb->fb_stride; + v->width = vfb->width; + v->height = vfb->height; + break; + } + } + } + + // Create a new fbo if none was found for the size + if(!nvfb) { + nvfb = new VirtualFramebufferDX9(); + nvfb->fbo = 0; + nvfb->fb_address = vfb->fb_address; + nvfb->fb_stride = vfb->fb_stride; + nvfb->z_address = vfb->z_address; + nvfb->z_stride = vfb->z_stride; + nvfb->width = vfb->width; + nvfb->height = vfb->height; + nvfb->renderWidth = vfb->width; + nvfb->renderHeight = vfb->height; + nvfb->bufferWidth = vfb->bufferWidth; + nvfb->bufferHeight = vfb->bufferHeight; + nvfb->format = vfb->format; + nvfb->usageFlags = FB_USAGE_RENDERTARGET; + nvfb->dirtyAfterDisplay = true; + + // When updating VRAM, it need to be exact format. switch (vfb->format) { case GE_FORMAT_4444: nvfb->colorDepth = FBO_4444; @@ -688,357 +731,355 @@ void FramebufferManagerDX9::ReadFramebufferToMemory(VirtualFramebufferDX9 *vfb, case GE_FORMAT_5551: nvfb->colorDepth = FBO_5551; break; - case GE_FORMAT_565: + case GE_FORMAT_565: nvfb->colorDepth = FBO_565; break; case GE_FORMAT_8888: - default: + default: nvfb->colorDepth = FBO_8888; break; } - } - nvfb->fbo = fbo_create(nvfb->width, nvfb->height, 1, true, nvfb->colorDepth); - if (!(nvfb->fbo)) { - ERROR_LOG(SCEGE, "Error creating FBO! %i x %i", nvfb->renderWidth, nvfb->renderHeight); - return; - } + nvfb->fbo = fbo_create(nvfb->width, nvfb->height, 1, true, nvfb->colorDepth); + if (!(nvfb->fbo)) { + ERROR_LOG(SCEGE, "Error creating FBO! %i x %i", nvfb->renderWidth, nvfb->renderHeight); + return; + } - nvfb->last_frame_render = gpuStats.numFlips; - bvfbs_.push_back(nvfb); - fbo_bind_as_render_target(nvfb->fbo); - ClearBuffer(); - } else { - nvfb->usageFlags |= FB_USAGE_RENDERTARGET; - nvfb->last_frame_render = gpuStats.numFlips; - nvfb->dirtyAfterDisplay = true; - - fbo_bind_as_render_target(nvfb->fbo); - - // Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering - // to it. This broke stuff before, so now it only clears on the first use of an - // FBO in a frame. This means that some games won't be able to avoid the on-some-GPUs - // performance-crushing framebuffer reloads from RAM, but we'll have to live with that. - if (nvfb->last_frame_render != gpuStats.numFlips) { + nvfb->last_frame_render = gpuStats.numFlips; + bvfbs_.push_back(nvfb); + fbo_bind_as_render_target(nvfb->fbo); ClearBuffer(); + } else { + nvfb->usageFlags |= FB_USAGE_RENDERTARGET; + gstate_c.textureChanged = true; + nvfb->last_frame_render = gpuStats.numFlips; + nvfb->dirtyAfterDisplay = true; + +#if 0 + if (nvfb->fbo) { + fbo_bind_as_render_target(nvfb->fbo); + } + + // Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering + // to it. This broke stuff before, so now it only clears on the first use of an + // FBO in a frame. This means that some games won't be able to avoid the on-some-GPUs + // performance-crushing framebuffer reloads from RAM, but we'll have to live with that. + if (nvfb->last_frame_render != gpuStats.numFlips) { + ClearBuffer(); + } +#endif } -#if 1 - PackFramebufferSync_(nvfb); + + vfb->memoryUpdated = true; + BlitFramebuffer_(vfb, nvfb, false); + +#if 0 +#ifdef USING_GLES2 + PackFramebufferSync_(nvfb); // synchronous glReadPixels +#else + if (gl_extensions.PBO_ARB || !gl_extensions.ATIClampBug) { + if (!sync) { + PackFramebufferAsync_(nvfb); // asynchronous glReadPixels using PBOs + } else { + PackFramebufferSync_(nvfb); // synchronous glReadPixels + } + } +#endif #endif } - - vfb->memoryUpdated = true; - BlitFramebuffer_(vfb, nvfb, false); - } -} - -void FramebufferManagerDX9::BlitFramebuffer_(VirtualFramebufferDX9 *src, VirtualFramebufferDX9 *dst, bool flip, float upscale, float vscale) { - // This only works with buffered rendering - if (!useBufferedRendering_ || !src->fbo) { - return; } - fbo_bind_as_render_target(dst->fbo); - - /* - if(glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - ERROR_LOG(HLE, "Incomplete target framebuffer, aborting blit"); - fbo_unbind(); - return; - } - */ - - dxstate.viewport.set(0, 0, dst->width, dst->height); - DisableState(); - - fbo_bind_color_as_texture(src->fbo, 0); - - float x, y, w, h; - CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight); - - DrawActiveTexture(x, y, w, h, flip, upscale, vscale); - - pD3Ddevice->SetTexture(0, NULL); - -#ifdef _XBOX - fbo_resolve(dst->fbo); -#endif - fbo_unbind(); -} - -// TODO: SSE/NEON -static void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format) { - if(format == GE_FORMAT_8888) { - if(src == dst) { + void FramebufferManagerDX9::BlitFramebuffer_(VirtualFramebufferDX9 *src, VirtualFramebufferDX9 *dst, bool flip, float upscale, float vscale) { + if (dst->fbo) { + fbo_bind_as_render_target(dst->fbo); + } else { + ERROR_LOG_REPORT_ONCE(dstfbozero, SCEGE, "BlitFramebuffer_: dst->fbo == 0"); + fbo_unbind(); return; - } else { // Here lets assume they don't intersect - memcpy(dst, src, stride * height * 4); } - } else { // But here it shouldn't matter if they do - int size = height * stride; - const u32 *src32 = (const u32 *)src; - u16 *dst16 = (u16 *)dst; - switch (format) { - case GE_FORMAT_565: // BGR 565 - for(int i = 0; i < size; i++) { - dst16[i] = RGBA8888toRGB565(src32[i]); - } - break; - case GE_FORMAT_5551: // ABGR 1555 - for(int i = 0; i < size; i++) { - dst16[i] = RGBA8888toRGBA5551(src32[i]); - } - break; - case GE_FORMAT_4444: // ABGR 4444 - for(int i = 0; i < size; i++) { - dst16[i] = RGBA8888toRGBA4444(src32[i]); + /* + if(glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + ERROR_LOG(HLE, "Incomplete target framebuffer, aborting blit"); + fbo_unbind(); + return; + } + */ + + dxstate.viewport.set(0, 0, dst->width, dst->height); + DisableState(); + + if (src->fbo) { + fbo_bind_color_as_texture(src->fbo, 0); + } else { + ERROR_LOG_REPORT_ONCE(srcfbozero, SCEGE, "BlitFramebuffer_: src->fbo == 0"); + fbo_unbind(); + return; + } + + float x, y, w, h; + CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight); + + DrawActiveTexture(0, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, flip, upscale, vscale); + + pD3Ddevice->SetTexture(0, NULL); + + fbo_unbind(); + } + + // TODO: SSE/NEON + // Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :) + void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format) { + if(format == GE_FORMAT_8888) { + if(src == dst) { + return; + } else { // Here lets assume they don't intersect + memcpy(dst, src, stride * height * 4); + } + } else { // But here it shouldn't matter if they do + int size = height * stride; + const u32 *src32 = (const u32 *)src; + u16 *dst16 = (u16 *)dst; + switch (format) { + case GE_FORMAT_565: // BGR 565 + for(int i = 0; i < size; i++) { + dst16[i] = RGBA8888toRGB565(src32[i]); + } + break; + case GE_FORMAT_5551: // ABGR 1555 + for(int i = 0; i < size; i++) { + dst16[i] = RGBA8888toRGBA5551(src32[i]); + } + break; + case GE_FORMAT_4444: // ABGR 4444 + for(int i = 0; i < size; i++) { + dst16[i] = RGBA8888toRGBA4444(src32[i]); + } + break; + case GE_FORMAT_8888: + // Not possible. + break; + default: + break; } - break; - case GE_FORMAT_8888: - // Not possible. - break; - default: - break; } } -} #ifdef _XBOX #include #endif -static void Resolve(u8* data, VirtualFramebufferDX9 *vfb) { + static void Resolve(u8* data, VirtualFramebufferDX9 *vfb) { #ifdef _XBOX - D3DTexture * rtt = (D3DTexture*)fbo_get_rtt(vfb->fbo); - pD3Ddevice->Resolve(D3DRESOLVE_RENDERTARGET0, NULL, rtt, NULL, 0, 0, NULL, 0.f, 0, NULL); + D3DTexture * rtt = (D3DTexture*)fbo_get_rtt(vfb->fbo); + pD3Ddevice->Resolve(D3DRESOLVE_RENDERTARGET0, NULL, rtt, NULL, 0, 0, NULL, 0.f, 0, NULL); - D3DLOCKED_RECT p; - rtt->LockRect(0, &p, NULL, 0); - rtt->UnlockRect(0); + D3DLOCKED_RECT p; + rtt->LockRect(0, &p, NULL, 0); + rtt->UnlockRect(0); - // vfb->fbo->tex is tilled !!!! - XGUntileTextureLevel(vfb->width/2, vfb->height/2, 0, XGGetGpuFormat(D3DFMT_LIN_A8R8G8B8), XGTILE_NONPACKED, data, p.Pitch, NULL, p.pBits, NULL); + // vfb->fbo->tex is tilled !!!! + XGUntileTextureLevel(vfb->width/2, vfb->height/2, 0, XGGetGpuFormat(D3DFMT_LIN_A8R8G8B8), XGTILE_NONPACKED, data, p.Pitch, NULL, p.pBits, NULL); #endif -} - - void FramebufferManagerDX9::PackFramebufferAsync_(VirtualFramebufferDX9 *vfb) { - - return; - - const int MAX_PBO = 2; - bool unbind = false; - bool useCPU = false; - - // Order packing/readback of the framebuffer - if (vfb) { - int pixelType, pixelSize, pixelFormat, align; - - - if (vfb->fbo) { - fbo_bind_for_read(vfb->fbo); - } else { - fbo_unbind(); - return; - } - -#ifdef _XBOX - D3DTexture * rtt = (D3DTexture*)fbo_get_rtt(vfb->fbo); - pD3Ddevice->Resolve(D3DRESOLVE_RENDERTARGET0, NULL, rtt, NULL, 0, 0, NULL, 0.f, 0, NULL); -#endif - - fbo_unbind(); - unbind = true; - } - } - void FramebufferManagerDX9::PackFramebufferSync_(VirtualFramebufferDX9 *vfb) { + void FramebufferManagerDX9::PackFramebufferDirectx9_(VirtualFramebufferDX9 *vfb) { if (vfb->fbo) { fbo_bind_for_read(vfb->fbo); } else { - // ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferSync_: vfb->fbo == 0"); + ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferSync_: vfb->fbo == 0"); fbo_unbind(); return; } - // Pixel size always 4 here because we always request RGBA8888 - size_t bufSize = vfb->fb_stride * vfb->height * 4; - u32 fb_address = (0x04000000) | vfb->fb_address; + // Pixel size always 4 here because we always request RGBA8888 + size_t bufSize = vfb->fb_stride * vfb->height * 4; + u32 fb_address = (0x04000000) | vfb->fb_address; - u8 *packed = 0; - if(vfb->format == GE_FORMAT_8888) { - packed = (u8 *)Memory::GetPointer(fb_address); - } else { // End result may be 16-bit but we are reading 32-bit, so there may not be enough space at fb_address - packed = (u8 *)malloc(bufSize * sizeof(u8)); - } - - if(packed) { - DEBUG_LOG(HLE, "Reading framebuffer to mem, bufSize = %u, packed = %p, fb_address = %08x", - (u32)bufSize, packed, fb_address); - - // Resolve(packed, vfb); - - if(vfb->format != GE_FORMAT_8888) { // If not RGBA 8888 we need to convert - ConvertFromRGBA8888(Memory::GetPointer(fb_address), packed, vfb->fb_stride, vfb->height, vfb->format); - free(packed); + u8 *packed = 0; + if(vfb->format == GE_FORMAT_8888) { + packed = (u8 *)Memory::GetPointer(fb_address); + } else { // End result may be 16-bit but we are reading 32-bit, so there may not be enough space at fb_address + packed = (u8 *)malloc(bufSize * sizeof(u8)); } - } + if(packed) { + DEBUG_LOG(HLE, "Reading framebuffer to mem, bufSize = %u, packed = %p, fb_address = %08x", + (u32)bufSize, packed, fb_address); - fbo_unbind(); -} -void FramebufferManagerDX9::EndFrame() { - if (resized_) { - DestroyAllFBOs(); - dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); - resized_ = false; + // Resolve(packed, vfb); + + if(vfb->format != GE_FORMAT_8888) { // If not RGBA 8888 we need to convert + ConvertFromRGBA8888(Memory::GetPointer(fb_address), packed, vfb->fb_stride, vfb->height, vfb->format); + free(packed); + } + } + + fbo_unbind(); } + void FramebufferManagerDX9::EndFrame() { + if (resized_) { + DestroyAllFBOs(); + dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight); + resized_ = false; + } #if 0 // We flush to memory last requested framebuffer, if any PackFramebufferAsync_(NULL); #endif -} + } -void FramebufferManagerDX9::DeviceLost() { - DestroyAllFBOs(); - resized_ = false; -} + void FramebufferManagerDX9::DeviceLost() { + DestroyAllFBOs(); + resized_ = false; + } -void FramebufferManagerDX9::BeginFrame() { - DecimateFBOs(); - currentRenderVfb_ = 0; - useBufferedRendering_ = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; -} + void FramebufferManagerDX9::BeginFrame() { + DecimateFBOs(); + currentRenderVfb_ = 0; + useBufferedRendering_ = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; + } -void FramebufferManagerDX9::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) { + void FramebufferManagerDX9::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) { - if ((framebuf & 0x04000000) == 0) { - DEBUG_LOG(SCEGE, "Non-VRAM display framebuffer address set: %08x", framebuf); - ramDisplayFramebufPtr_ = framebuf; - displayStride_ = stride; - displayFormat_ = format; - } else { - ramDisplayFramebufPtr_ = 0; displayFramebufPtr_ = framebuf; displayStride_ = stride; displayFormat_ = format; } -} -std::vector FramebufferManagerDX9::GetFramebufferList() { - std::vector list; + std::vector FramebufferManagerDX9::GetFramebufferList() { + std::vector list; - for (size_t i = 0; i < vfbs_.size(); ++i) { - VirtualFramebufferDX9 *vfb = vfbs_[i]; - - FramebufferInfo info; - info.fb_address = vfb->fb_address; - info.z_address = vfb->z_address; - info.format = vfb->format; - info.width = vfb->width; - info.height = vfb->height; - info.fbo = vfb->fbo; - list.push_back(info); - } - - return list; -} - -void FramebufferManagerDX9::DecimateFBOs() { - fbo_unbind(); - currentRenderVfb_ = 0; - // TODO: This doesn't make sense for DirectX? -#ifndef USING_GLES2 - bool useMem = g_Config.iRenderingMode == FB_READFBOMEMORY_GPU || g_Config.iRenderingMode == FB_READFBOMEMORY_CPU; -#else - bool useMem = g_Config.iRenderingMode == FB_READFBOMEMORY_GPU; -#endif - for (size_t i = 0; i < vfbs_.size(); ++i) { - VirtualFramebufferDX9 *vfb = vfbs_[i]; - int age = frameLastFramebufUsed - std::max(vfb->last_frame_render, vfb->last_frame_used); - - if(useMem && age == 0 && !vfb->memoryUpdated) { - ReadFramebufferToMemory(vfb); - } - - if (vfb == displayFramebuf_ || vfb == prevDisplayFramebuf_ || vfb == prevPrevDisplayFramebuf_) { - continue; - } - - if (age > FBO_OLD_AGE) { - INFO_LOG(SCEGE, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age); - DestroyFramebuf(vfb); - vfbs_.erase(vfbs_.begin() + i--); - } - } - - // Do the same for ReadFramebuffersToMemory's VFBs - for (size_t i = 0; i < bvfbs_.size(); ++i) { - VirtualFramebufferDX9 *vfb = bvfbs_[i]; - int age = frameLastFramebufUsed - vfb->last_frame_render; - if (age > FBO_OLD_AGE) { - INFO_LOG(SCEGE, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age); - DestroyFramebuf(vfb); - bvfbs_.erase(bvfbs_.begin() + i--); - } - } -} - -void FramebufferManagerDX9::DestroyAllFBOs() { - fbo_unbind(); - currentRenderVfb_ = 0; - displayFramebuf_ = 0; - prevDisplayFramebuf_ = 0; - prevPrevDisplayFramebuf_ = 0; - - for (size_t i = 0; i < vfbs_.size(); ++i) { - VirtualFramebufferDX9 *vfb = vfbs_[i]; - INFO_LOG(SCEGE, "Destroying FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->format); - DestroyFramebuf(vfb); - } - vfbs_.clear(); -} - -void FramebufferManagerDX9::UpdateFromMemory(u32 addr, int size) { - addr &= ~0x40000000; - // TODO: Could go through all FBOs, but probably not important? - // TODO: Could also check for inner changes, but video is most important. - if (addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr()) { - // TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help. - if (!Memory::IsValidAddress(displayFramebufPtr_)) - return; - - fbo_unbind(); - currentRenderVfb_ = 0; - - bool needUnbind = false; for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebufferDX9 *vfb = vfbs_[i]; - if (MaskedEqual(vfb->fb_address, addr)) { - vfb->dirtyAfterDisplay = true; - vfb->reallyDirtyAfterDisplay = true; - // TODO: This without the fbo_unbind() above would be better than destroying the FBO. - // However, it doesn't seem to work for Star Ocean, at least - if (useBufferedRendering_) { - fbo_bind_as_render_target(vfb->fbo); - needUnbind = true; - DrawPixels(Memory::GetPointer(addr), vfb->format, vfb->fb_stride); - } else { - INFO_LOG(SCEGE, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format); - DestroyFramebuf(vfb); - vfbs_.erase(vfbs_.begin() + i--); - } + + FramebufferInfo info; + info.fb_address = vfb->fb_address; + info.z_address = vfb->z_address; + info.format = vfb->format; + info.width = vfb->width; + info.height = vfb->height; + info.fbo = vfb->fbo; + list.push_back(info); + } + + return list; + } + + // MotoGP workaround + void FramebufferManagerDX9::NotifyFramebufferCopy(u32 src, u32 dest, int size) { + for (size_t i = 0; i < vfbs_.size(); i++) { + // This size fits for MotoGP. Might want to make this more flexible for other games if they do the same. + if ((vfbs_[i]->fb_address | 0x04000000) == src && size == 512 * 272 * 2) { + // A framebuffer matched! + knownFramebufferCopies_.insert(std::pair(src, dest)); + } + } + } + + void FramebufferManagerDX9::DecimateFBOs() { + fbo_unbind(); + currentRenderVfb_ = 0; + bool updateVram = !(g_Config.iRenderingMode == FB_NON_BUFFERED_MODE || g_Config.iRenderingMode == FB_BUFFERED_MODE); + + for (size_t i = 0; i < vfbs_.size(); ++i) { + VirtualFramebufferDX9 *vfb = vfbs_[i]; + int age = frameLastFramebufUsed - std::max(vfb->last_frame_render, vfb->last_frame_used); + + if (updateVram && age == 0 && !vfb->memoryUpdated && vfb == displayFramebuf_) + ReadFramebufferToMemory(vfb); + + if (vfb == displayFramebuf_ || vfb == prevDisplayFramebuf_ || vfb == prevPrevDisplayFramebuf_) { + continue; + } + + if (age > FBO_OLD_AGE) { + INFO_LOG(SCEGE, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age); + DestroyFramebuf(vfb); + vfbs_.erase(vfbs_.begin() + i--); } } - if (needUnbind) - fbo_unbind(); + // Do the same for ReadFramebuffersToMemory's VFBs + for (size_t i = 0; i < bvfbs_.size(); ++i) { + VirtualFramebufferDX9 *vfb = bvfbs_[i]; + int age = frameLastFramebufUsed - vfb->last_frame_render; + if (age > FBO_OLD_AGE) { + INFO_LOG(SCEGE, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age); + DestroyFramebuf(vfb); + bvfbs_.erase(bvfbs_.begin() + i--); + } + } } -} -void FramebufferManagerDX9::Resized() { - resized_ = true; -} + void FramebufferManagerDX9::DestroyAllFBOs() { + fbo_unbind(); + currentRenderVfb_ = 0; + displayFramebuf_ = 0; + prevDisplayFramebuf_ = 0; + prevPrevDisplayFramebuf_ = 0; -} // namespace DX9 \ No newline at end of file + for (size_t i = 0; i < vfbs_.size(); ++i) { + VirtualFramebufferDX9 *vfb = vfbs_[i]; + INFO_LOG(SCEGE, "Destroying FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->format); + DestroyFramebuf(vfb); + } + vfbs_.clear(); + } + + void FramebufferManagerDX9::UpdateFromMemory(u32 addr, int size, bool safe) { + addr &= ~0x40000000; + // TODO: Could go through all FBOs, but probably not important? + // TODO: Could also check for inner changes, but video is most important. + if (addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr() || safe) { + // TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help. + if (!Memory::IsValidAddress(displayFramebufPtr_)) + return; + + fbo_unbind(); + currentRenderVfb_ = 0; + + bool needUnbind = false; + for (size_t i = 0; i < vfbs_.size(); ++i) { + VirtualFramebufferDX9 *vfb = vfbs_[i]; + if (MaskedEqual(vfb->fb_address, addr)) { + vfb->dirtyAfterDisplay = true; + vfb->reallyDirtyAfterDisplay = true; + // TODO: This without the fbo_unbind() above would be better than destroying the FBO. + // However, it doesn't seem to work for Star Ocean, at least + if (useBufferedRendering_ && vfb->fbo) { + fbo_bind_as_render_target(vfb->fbo); + needUnbind = true; + DrawPixels(Memory::GetPointer(addr | 0x04000000), vfb->format, vfb->fb_stride); + } else { + INFO_LOG(SCEGE, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format); + DestroyFramebuf(vfb); + vfbs_.erase(vfbs_.begin() + i--); + } + } + } + + if (needUnbind) + fbo_unbind(); + } + } + + void FramebufferManagerDX9::Resized() { + resized_ = true; + } + + + bool FramebufferManagerDX9::GetCurrentFramebuffer(GPUDebugBuffer &buffer) { + return true; + } + + bool FramebufferManagerDX9::GetCurrentDepthbuffer(GPUDebugBuffer &buffer) { + return false; + } + + bool FramebufferManagerDX9::GetCurrentStencilbuffer(GPUDebugBuffer &buffer) { + return false; + } + +} // namespace DX9 diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index caa7632da0..de91722b95 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -18,6 +18,8 @@ #pragma once #include +#include + #include "d3d9.h" #include "GPU/Directx9/helper/fbo.h" @@ -98,7 +100,8 @@ public: } void DrawPixels(const u8 *framebuf, GEBufferFormat pixelFormat, int linesize); - void DrawActiveTexture(float x, float y, float w, float h, bool flip = false, float uscale = 1.0f, float vscale = 1.0f); + + void DrawActiveTexture(LPDIRECT3DTEXTURE9 tex, float x, float y, float w, float h, float destW, float destH, bool flip = false, float uscale = 1.0f, float vscale = 1.0f); void DestroyAllFBOs(); void DecimateFBOs(); @@ -108,13 +111,16 @@ public: void Resized(); void DeviceLost(); void CopyDisplayToOutput(); - void SetRenderFrameBuffer(); // Uses parameters computed from gstate - void UpdateFromMemory(u32 addr, int size); + void SetRenderFrameBuffer(); // Uses parameters computed from gstate + void UpdateFromMemory(u32 addr, int size, bool safe); void ReadFramebufferToMemory(VirtualFramebufferDX9 *vfb, bool sync = true); // TODO: Break out into some form of FBO manager - VirtualFramebufferDX9 *GetDisplayFBO(); + VirtualFramebufferDX9 *GetVFBAt(u32 addr); + VirtualFramebufferDX9 *GetDisplayVFB() { + return GetVFBAt(displayFramebufPtr_); + } void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format); size_t NumVFBs() const { return vfbs_.size(); } @@ -132,10 +138,19 @@ public: return displayFramebuf_ ? (0x04000000 | displayFramebuf_->fb_address) : 0; } + void NotifyFramebufferCopy(u32 src, u32 dest, int size); + void DestroyFramebuf(VirtualFramebufferDX9 *vfb); + bool GetCurrentFramebuffer(GPUDebugBuffer &buffer); + bool GetCurrentDepthbuffer(GPUDebugBuffer &buffer); + bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer); + private: - u32 ramDisplayFramebufPtr_; // workaround for MotoGP insanity + void CompileDraw2DProgram(); + void DestroyDraw2DProgram(); + + void SetNumExtraFBOs(int num); u32 displayFramebufPtr_; u32 displayStride_; GEBufferFormat displayFormat_; @@ -151,24 +166,35 @@ private: // Used by ReadFramebufferToMemory void BlitFramebuffer_(VirtualFramebufferDX9 *src, VirtualFramebufferDX9 *dst, bool flip = false, float upscale = 1.0f, float vscale = 1.0f); - std::vector bvfbs_; // blitting FBOs - //void PackFramebufferDirectx9_(VirtualFramebufferDX9 *vfb); - void PackFramebufferAsync_(VirtualFramebufferDX9 *vfb); - void PackFramebufferSync_(VirtualFramebufferDX9 *vfb); + void PackFramebufferDirectx9_(VirtualFramebufferDX9 *vfb); // Used by DrawPixels LPDIRECT3DTEXTURE9 drawPixelsTex_; GEBufferFormat drawPixelsTexFormat_; u8 *convBuf; - GLSLProgram *draw2dprogram; + int plainColorLoc_; TextureCacheDX9 *textureCache_; ShaderManagerDX9 *shaderManager_; + bool usePostShader_; + bool postShaderAtOutputResolution_; + + // Used by post-processing shader + std::vector extraFBOs_; bool resized_; bool useBufferedRendering_; + + std::vector bvfbs_; // blitting FBOs + + std::set> knownFramebufferCopies_; + +#if 0 + AsyncPBO *pixelBufObj_; //this isn't that large + u8 currentPBO_; +#endif }; }; diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 8d2118dd06..9890848841 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -475,7 +475,7 @@ bool DIRECTX9_GPU::FramebufferDirty() { // Allow it to process fully before deciding if it's dirty. SyncThread(); } - VirtualFramebufferDX9 *vfb = framebufferManager_.GetDisplayFBO(); + VirtualFramebufferDX9 *vfb = framebufferManager_.GetDisplayVFB(); if (vfb) { bool dirty = vfb->dirtyAfterDisplay; vfb->dirtyAfterDisplay = false; @@ -492,7 +492,7 @@ bool DIRECTX9_GPU::FramebufferReallyDirty() { SyncThread(); } - VirtualFramebufferDX9 *vfb = framebufferManager_.GetDisplayFBO(); + VirtualFramebufferDX9 *vfb = framebufferManager_.GetDisplayVFB(); if (vfb) { bool dirty = vfb->reallyDirtyAfterDisplay; vfb->reallyDirtyAfterDisplay = false; @@ -514,7 +514,7 @@ void DIRECTX9_GPU::CopyDisplayToOutputInternal() { framebufferManager_.CopyDisplayToOutput(); framebufferManager_.EndFrame(); - shaderManager_->EndFrame(); + // shaderManager_->EndFrame(); gstate_c.textureChanged = TEXCHANGE_UPDATED; } @@ -681,9 +681,9 @@ void DIRECTX9_GPU::ExecuteOp(u32 op, u32 diff) { } // TODO: Get rid of this old horror... - int bz_ucount = data & 0xFF; - int bz_vcount = (data >> 8) & 0xFF; - transformDraw_.DrawBezier(bz_ucount, bz_vcount); + // int bz_ucount = data & 0xFF; + // int bz_vcount = (data >> 8) & 0xFF; + // transformDraw_.DrawBezier(bz_ucount, bz_vcount); // And instead use this. // GEPatchPrimType patchPrim = gstate.getPatchPrimitiveType(); @@ -1308,7 +1308,7 @@ void DIRECTX9_GPU::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationTy textureCache_.InvalidateAll(type); if (type != GPU_INVALIDATE_ALL) - framebufferManager_.UpdateFromMemory(addr, size); + framebufferManager_.UpdateFromMemory(addr, size, false); } bool DIRECTX9_GPU::PerformMemoryCopy(u32 dest, u32 src, int size) { @@ -1359,4 +1359,12 @@ void DIRECTX9_GPU::DoState(PointerWrap &p) { shaderManager_->ClearCache(true); } +bool DIRECTX9_GPU::GetCurrentFramebuffer(GPUDebugBuffer &buffer) { return false; } +bool DIRECTX9_GPU::GetCurrentDepthbuffer(GPUDebugBuffer &buffer) { return false; } +bool DIRECTX9_GPU::GetCurrentStencilbuffer(GPUDebugBuffer &buffer) { return false; } + +void DIRECTX9_GPU::ClearShaderCache() { + shaderManager_->ClearCache(true); +} + }; diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h index 33bd0bd19b..6a71974f60 100644 --- a/GPU/Directx9/GPU_DX9.h +++ b/GPU/Directx9/GPU_DX9.h @@ -38,7 +38,8 @@ public: DIRECTX9_GPU(); ~DIRECTX9_GPU(); virtual void InitClear(); - virtual void PreExecuteOp(u32 op, u32 diff); + virtual void PreExecuteOp(u32 op, u32 diff); + void ExecuteOpInternal(u32 op, u32 diff); virtual void ExecuteOp(u32 op, u32 diff); virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format); @@ -59,6 +60,7 @@ public: // Called by the window system if the window size changed. This will be reflected in PSPCoreParam.pixel*. virtual void Resized(); + virtual void ClearShaderCache(); virtual bool DecodeTexture(u8* dest, GPUgstate state) { return textureCache_.DecodeTexture(dest, state); } @@ -71,6 +73,10 @@ public: } std::vector GetFramebufferList(); + bool GetCurrentFramebuffer(GPUDebugBuffer &buffer); + bool GetCurrentDepthbuffer(GPUDebugBuffer &buffer); + bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer); + bool GetCurrentTexture(GPUDebugBuffer &buffer); protected: virtual void FastRunLoop(DisplayList &list); virtual void ProcessEvent(GPUEvent ev); diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index f385dc108f..5226f636f9 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -519,7 +519,7 @@ void ShaderManagerDX9::DirtyShader() { shaderSwitchDirty_ = 0; } -void ShaderManagerDX9::EndFrame() { // disables vertex arrays +void ShaderManagerDX9::DirtyLastShader() { // disables vertex arrays if (lastShader_) lastShader_->stop(); lastShader_ = 0; diff --git a/GPU/Directx9/ShaderManagerDX9.h b/GPU/Directx9/ShaderManagerDX9.h index 08278bdfd5..17c7d45e12 100644 --- a/GPU/Directx9/ShaderManagerDX9.h +++ b/GPU/Directx9/ShaderManagerDX9.h @@ -31,6 +31,19 @@ class PSShader; class VSShader; void ConvertProjMatrixToD3D(Matrix4x4 & in); +// Pre-fetched attrs and uniforms +enum { + ATTR_POSITION = 0, + ATTR_TEXCOORD = 1, + ATTR_NORMAL = 2, + ATTR_W1 = 3, + ATTR_W2 = 4, + ATTR_COLOR0 = 5, + ATTR_COLOR1 = 6, + + ATTR_COUNT, +}; + class LinkedShaderDX9 { protected: @@ -62,6 +75,9 @@ public: u32 dirtyUniforms; + // Present attributes in the shader. + int attrMask; // 1 << ATTR_ ... or-ed together. + // Pre-fetched attrs and uniforms D3DXHANDLE a_position; D3DXHANDLE a_color0; @@ -199,7 +215,7 @@ public: void DirtyUniform(u32 what) { globalDirty_ |= what; } - void EndFrame(); // disables vertex arrays + void DirtyLastShader(); // disables vertex arrays int NumVertexShaders() const { return (int)vsCache_.size(); } int NumFragmentShaders() const { return (int)fsCache_.size(); } diff --git a/GPU/Directx9/SplineDX9.cpp b/GPU/Directx9/SplineDX9.cpp index 97e38a12cc..b7626459a1 100644 --- a/GPU/Directx9/SplineDX9.cpp +++ b/GPU/Directx9/SplineDX9.cpp @@ -21,56 +21,6 @@ namespace DX9 { -// Just to get something on the screen, we'll just not subdivide correctly. -void TransformDrawEngineDX9::DrawBezier(int ucount, int vcount) { - u16 indices[3 * 3 * 6]; - - static bool reported = false; - if (!reported) { - Reporting::ReportMessage("Unsupported bezier curve"); - reported = true; - } - - // if (gstate.patchprimitive) - // Generate indices for a rectangular mesh. - int c = 0; - for (int y = 0; y < 3; y++) { - for (int x = 0; x < 3; x++) { - indices[c++] = y * 3 + x; - indices[c++] = y * 3 + x + 1; - indices[c++] = (y + 1) * 3 + x + 1; - indices[c++] = (y + 1) * 3 + x + 1; - indices[c++] = (y + 1) * 3 + x; - indices[c++] = y * 3 + x; - } - } - - // We are free to use the "decoded" buffer here. - // Let's split it into two to get a second buffer, there's enough space. - u8 *decoded2 = decoded + 65536 * 24; - - // Alright, now for the vertex data. - // For now, we will simply inject UVs. - - float customUV[4 * 4 * 2]; - for (int y = 0; y < 4; y++) { - for (int x = 0; x < 4; x++) { - customUV[(y * 4 + x) * 2 + 0] = (float)x/3.0f; - customUV[(y * 4 + x) * 2 + 1] = (float)y/3.0f; - } - } - - if (!vertTypeGetTexCoordMask(gstate.vertType)) { - VertexDecoderDX9 *dec = GetVertexDecoder(gstate.vertType); - dec->SetVertexType(gstate.vertType); - u32 newVertType = dec->InjectUVs(decoded2, Memory::GetPointer(gstate_c.vertexAddr), customUV, 16); - SubmitPrim(decoded2, &indices[0], GE_PRIM_TRIANGLES, c, newVertType, GE_VTYPE_IDX_16BIT, 0); - } else { - SubmitPrim(Memory::GetPointer(gstate_c.vertexAddr), &indices[0], GE_PRIM_TRIANGLES, c, gstate.vertType, GE_VTYPE_IDX_16BIT, 0); - } - Flush(); // as our vertex storage here is temporary, it will only survive one draw. -} - // Spline implementation copied and modified from neobrain's softgpu (orphis code?) diff --git a/GPU/Directx9/TransformPipelineDX9.cpp b/GPU/Directx9/TransformPipelineDX9.cpp index 9755bcf8b6..69fbb500d4 100644 --- a/GPU/Directx9/TransformPipelineDX9.cpp +++ b/GPU/Directx9/TransformPipelineDX9.cpp @@ -15,6 +15,55 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + + +// Ideas for speeding things up on mobile OpenGL ES implementations +// +// Use superbuffers! Yes I just invented that name. +// +// The idea is to avoid respecifying the vertex format between every draw call (multiple glVertexAttribPointer ...) +// by combining the contents of multiple draw calls into one buffer, as long as +// they have exactly the same output vertex format. (different input formats is fine! This way +// we can combine the data for multiple draws with different numbers of bones, as we consider numbones < 4 to be = 4) +// into one VBO. +// +// This will likely be a win because I believe that between every change of VBO + glVertexAttribPointer*N, the driver will +// perform a lot of validation, probably at draw call time, while all the validation can be skipped if the only thing +// that changes between two draw calls is simple state or texture or a matrix etc, not anything vertex related. +// Also the driver will have to manage hundreds instead of thousands of VBOs in games like GTA. +// +// * Every 10 frames or something, do the following: +// - Frame 1: +// + Mark all drawn buffers with in-frame sequence numbers (alternatively, +// just log them in an array) +// - Frame 2 (beginning?): +// + Take adjacent buffers that have the same output vertex format, and add them +// to a list of buffers to combine. Create said buffers with appropriate sizes +// and precompute the offsets that the draws should be written into. +// - Frame 2 (end): +// + Actually do the work of combining the buffers. This probably means re-decoding +// the vertices into a new one. Will also have to apply index offsets. +// +// Also need to change the drawing code so that we don't glBindBuffer and respecify glVAP if +// two subsequent drawcalls come from the same superbuffer. +// +// Or we ignore all of this including vertex caching and simply find a way to do highly optimized vertex streaming, +// like Dolphin is trying to. That will likely never be able to reach the same speed as perfectly optimized +// superbuffers though. For this we will have to JIT the vertex decoder but that's not too hard. +// +// Now, when do we delete superbuffers? Maybe when half the buffers within have been killed? +// +// Another idea for GTA which switches textures a lot while not changing much other state is to use ES 3 Array +// textures, if they are the same size (even if they aren't, might be okay to simply resize the textures to match +// if they're just a multiple of 2 away) or something. Then we'd have to add a W texture coordinate to choose the +// texture within the bound texture array to the vertex data when merging into superbuffers. +// +// There are even more things to try. For games that do matrix palette skinning by quickly switching bones and +// just drawing a few triangles per call (NBA, FF:CC, Tekken 6 etc) we could even collect matrices, upload them +// all at once, writing matrix indices into the vertices in addition to the weights, and then doing a single +// draw call with specially generated shader to draw the whole mesh. This code will be seriously complex though. + +#include "base/logging.h" #include "base/timeutil.h" #include "Common/MemoryUtil.h" @@ -49,7 +98,11 @@ const D3DPRIMITIVETYPE glprim[8] = { D3DPT_TRIANGLELIST, D3DPT_TRIANGLESTRIP, D3DPT_TRIANGLEFAN, +#ifndef _XBOX D3DPT_TRIANGLELIST, // With OpenGL ES we have to expand sprites into triangles, tripling the data instead of doubling. sigh. OpenGL ES, Y U NO SUPPORT GL_QUADS? +#else + D3DPT_TRIANGLELIST, +#endif }; #ifndef _XBOX @@ -76,12 +129,21 @@ enum { TRANSFORMED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * sizeof(TransformedVertex) }; +#define QUAD_INDICES_MAX 32768 #define VERTEXCACHE_DECIMATION_INTERVAL 17 +#ifndef _XBOX +// Check for max first as clamping to max is more common than min when lighting. inline float clamp(float in, float min, float max) { - return in < min ? min : (in > max ? max : in); + return in > max ? max : (in < min ? min : in); } +#else +inline float clamp(float in, float min, float max) { + in = __fsel( in - min , in, min ); + return __fsel( in - max, max, in ); +} +#endif TransformDrawEngineDX9::TransformDrawEngineDX9() : collectedVerts(0), @@ -94,14 +156,24 @@ TransformDrawEngineDX9::TransformDrawEngineDX9() numDrawCalls(0), vertexCountInDrawCalls(0), uvScale(0) { - decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL; - // Allocate nicely aligned memory. Maybe graphics drivers will - // appreciate it. - // All this is a LOT of memory, need to see if we can cut down somehow. - decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE); - decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE); - transformed = (TransformedVertex *)AllocateMemoryPages(TRANSFORMED_VERTEX_BUFFER_SIZE); - transformedExpanded = (TransformedVertex *)AllocateMemoryPages(3 * TRANSFORMED_VERTEX_BUFFER_SIZE); + decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL; + // Allocate nicely aligned memory. Maybe graphics drivers will + // appreciate it. + // All this is a LOT of memory, need to see if we can cut down somehow. + decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE); + decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE); + transformed = (TransformedVertex *)AllocateMemoryPages(TRANSFORMED_VERTEX_BUFFER_SIZE); + transformedExpanded = (TransformedVertex *)AllocateMemoryPages(3 * TRANSFORMED_VERTEX_BUFFER_SIZE); + quadIndices_ = new u16[6 * QUAD_INDICES_MAX]; + + for (int i = 0; i < QUAD_INDICES_MAX; i++) { + quadIndices_[i * 6 + 0] = i * 4; + quadIndices_[i * 6 + 1] = i * 4 + 2; + quadIndices_[i * 6 + 2] = i * 4 + 1; + quadIndices_[i * 6 + 3] = i * 4 + 1; + quadIndices_[i * 6 + 4] = i * 4 + 2; + quadIndices_[i * 6 + 5] = i * 4 + 3; + } if (g_Config.bPrescaleUV) { uvScale = new UVScale[MAX_DEFERRED_DRAW_CALLS]; @@ -117,6 +189,8 @@ TransformDrawEngineDX9::~TransformDrawEngineDX9() { FreeMemoryPages(transformed, TRANSFORMED_VERTEX_BUFFER_SIZE); FreeMemoryPages(transformedExpanded, 3 * TRANSFORMED_VERTEX_BUFFER_SIZE); + delete [] quadIndices_; + for (auto iter = decoderMap_.begin(); iter != decoderMap_.end(); iter++) { delete iter->second; } @@ -413,7 +487,7 @@ void TransformDrawEngineDX9::SoftwareTransformAndDraw( float v[3] = {0, 0, 0}; float c0[4] = {1, 1, 1, 1}; float c1[4] = {0, 0, 0, 0}; - float uv[3] = {0, 0, 0}; + float uv[3] = {0, 0, 1}; float fogCoef = 1.0f; if (throughmode) { @@ -448,7 +522,7 @@ void TransformDrawEngineDX9::SoftwareTransformAndDraw( if (reader.hasNormal()) reader.ReadNrm(nrm); - if ((vertType & GE_VTYPE_WEIGHT_MASK) == GE_VTYPE_WEIGHT_NONE) { + if (!vertTypeIsSkinningEnabled(vertType)) { Vec3ByMatrix43(out, pos, gstate.worldMatrix); if (reader.hasNormal()) { Norm3ByMatrix43(norm, nrm, gstate.worldMatrix); @@ -460,9 +534,7 @@ void TransformDrawEngineDX9::SoftwareTransformAndDraw( // Skinning Vec3f psum(0,0,0); Vec3f nsum(0,0,0); - int nweights = ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT) + 1; - for (int i = 0; i < nweights; i++) - { + for (int i = 0; i < vertTypeGetNumBoneWeights(vertType); i++) { if (weights[i] != 0.0f) { Vec3ByMatrix43(out, pos, gstate.boneMatrix+i*12); Vec3f tpos(out); @@ -602,6 +674,7 @@ void TransformDrawEngineDX9::SoftwareTransformAndDraw( ERROR_LOG_REPORT(G3D, "Impossible UV gen mode? %d", gstate.getUVGenMode()); break; } + uv[0] = uv[0] * widthFactor; uv[1] = uv[1] * heightFactor; @@ -640,11 +713,14 @@ void TransformDrawEngineDX9::SoftwareTransformAndDraw( drawBuffer = transformedExpanded; TransformedVertex *trans = &transformedExpanded[0]; TransformedVertex saved; + u32 stencilValue; for (int i = 0; i < vertexCount; i += 2) { int index = ((const u16*)inds)[i]; saved = transformed[index]; int index2 = ((const u16*)inds)[i + 1]; TransformedVertex &transVtx = transformed[index2]; + if (i == 0) + stencilValue = transVtx.color0[3]; // We have to turn the rectangle into two triangles, so 6 points. Sigh. // bottom right @@ -674,6 +750,7 @@ void TransformDrawEngineDX9::SoftwareTransformAndDraw( // Apparently, non-through RotateUV just breaks things. // If we find a game where it helps, we'll just have to figure out how they differ. // Possibly, it has something to do with flipped viewport Y axis, which a few games use. + // One game might be one of the Metal Gear ones, can't find the issue right now though. // else // RotateUV(trans); @@ -686,6 +763,12 @@ void TransformDrawEngineDX9::SoftwareTransformAndDraw( numTrans += 6; } + + // We don't know the color until here, so we have to do it now, instead of in StateMapping. + // Might want to reconsider the order of things later... + if (gstate.isModeClear() && gstate.isClearModeAlphaMask()) { + dxstate.stencilFunc.set(D3DCMP_ALWAYS, stencilValue, 255); + } } @@ -806,6 +889,9 @@ void TransformDrawEngineDX9::SubmitPrim(void *verts, void *inds, GEPrimitiveType } void TransformDrawEngineDX9::DecodeVerts() { + UVScale origUV; + if (uvScale) + origUV = gstate_c.uv; for (int i = 0; i < numDrawCalls; i++) { const DeferredDrawCall &dc = drawCalls[i]; @@ -835,7 +921,7 @@ void TransformDrawEngineDX9::DecodeVerts() { while (j < numDrawCalls) { if (drawCalls[j].verts != dc.verts) break; - if (uvScale && memcmp(&uvScale[j], &uvScale[i], sizeof(uvScale[0]) != 0)) + if (uvScale && memcmp(&uvScale[j], &uvScale[i], sizeof(uvScale[0])) != 0) break; indexLowerBound = std::min(indexLowerBound, (int)drawCalls[j].indexLowerBound); @@ -876,6 +962,8 @@ void TransformDrawEngineDX9::DecodeVerts() { // Force to points (0) indexGen.AddPrim(GE_PRIM_POINTS, 0); } + if (uvScale) + gstate_c.uv = origUV; } u32 TransformDrawEngineDX9::ComputeHash() { @@ -993,6 +1081,7 @@ void TransformDrawEngineDX9::DoFlush() { LPDIRECT3DINDEXBUFFER9 ib_ = NULL; int vertexCount = 0; + int maxIndex = 0; bool useElements = true; // Cannot cache vertex data with morph enabled. @@ -1019,6 +1108,7 @@ void TransformDrawEngineDX9::DoFlush() { DecodeVerts(); // writes to indexGen vai->numVerts = indexGen.VertexCount(); vai->prim = indexGen.Prim(); + vai->maxIndex = indexGen.MaxIndex(); goto rotateVBO; } @@ -1065,6 +1155,7 @@ void TransformDrawEngineDX9::DoFlush() { DecodeVerts(); vai->numVerts = indexGen.VertexCount(); vai->prim = indexGen.Prim(); + vai->maxIndex = indexGen.MaxIndex(); useElements = !indexGen.SeenOnlyPurePrims(); if (!useElements && indexGen.PureCount()) { vai->numVerts = indexGen.PureCount(); @@ -1097,7 +1188,8 @@ void TransformDrawEngineDX9::DoFlush() { vb_ = vai->vbo; ib_ = vai->ebo; vertexCount = vai->numVerts; - prim = static_cast(vai->prim); + maxIndex = vai->maxIndex; + prim = static_cast(vai->prim); break; } @@ -1114,7 +1206,9 @@ void TransformDrawEngineDX9::DoFlush() { ib_ = vai->ebo; vertexCount = vai->numVerts; - prim = static_cast(vai->prim); + + maxIndex = vai->maxIndex; + prim = static_cast(vai->prim); break; } @@ -1135,7 +1229,8 @@ void TransformDrawEngineDX9::DoFlush() { rotateVBO: gpuStats.numUncachedVertsDrawn += indexGen.VertexCount(); useElements = !indexGen.SeenOnlyPurePrims(); - vertexCount = indexGen.VertexCount(); + vertexCount = indexGen.VertexCount(); + maxIndex = indexGen.MaxIndex(); if (!useElements && indexGen.PureCount()) { vertexCount = indexGen.PureCount(); } @@ -1191,5 +1286,51 @@ rotateVBO: host->GPUNotifyDraw(); #endif } +bool TransformDrawEngineDX9::TestBoundingBox(void* control_points, int vertexCount, u32 vertType) { + // Simplify away bones and morph before proceeding -} \ No newline at end of file + /* + SimpleVertex *corners = (SimpleVertex *)(decoded + 65536 * 12); + u8 *temp_buffer = decoded + 65536 * 24; + + u32 origVertType = vertType; + vertType = NormalizeVertices((u8 *)corners, temp_buffer, (u8 *)control_points, 0, vertexCount, vertType); + + for (int cube = 0; cube < vertexCount / 8; cube++) { + // For each cube... + + for (int i = 0; i < 8; i++) { + const SimpleVertex &vert = corners[cube * 8 + i]; + + // To world space... + float worldPos[3]; + Vec3ByMatrix43(worldPos, (float *)&vert.pos.x, gstate.worldMatrix); + + // To view space... + float viewPos[3]; + Vec3ByMatrix43(viewPos, worldPos, gstate.viewMatrix); + + // And finally to screen space. + float frustumPos[4]; + Vec3ByMatrix44(frustumPos, viewPos, gstate.projMatrix); + + // Project to 2D + float x = frustumPos[0] / frustumPos[3]; + float y = frustumPos[1] / frustumPos[3]; + + // Rescale 2d position + // ... + } + } + */ + + + // Let's think. A better approach might be to take the edges of the drawing region and the projection + // matrix to build a frustum pyramid, and then clip the cube against those planes. If all vertices fail the same test, + // the cube is out. Otherwise it's in. + // TODO.... + + return true; +} + +} // namespace diff --git a/GPU/Directx9/TransformPipelineDX9.h b/GPU/Directx9/TransformPipelineDX9.h index ad42803697..df3d70b886 100644 --- a/GPU/Directx9/TransformPipelineDX9.h +++ b/GPU/Directx9/TransformPipelineDX9.h @@ -80,8 +80,9 @@ public: LPDIRECT3DINDEXBUFFER9 ebo; - // Precalculated parameter for drawdrawElements + // Precalculated parameter for drawRangeElements u16 numVerts; + u16 maxIndex; s8 prim; // ID information @@ -98,12 +99,11 @@ class TransformDrawEngineDX9 { public: TransformDrawEngineDX9(); virtual ~TransformDrawEngineDX9(); - void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertexType, int forceIndexType, int *bytesRead); - void SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertex_type); - void SubmitBezier(void* control_points, void* indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertex_type); - - // legacy - void DrawBezier(int ucount, int vcount); + + void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int forceIndexType, int *bytesRead); + void SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertType); + void SubmitBezier(void* control_points, void* indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertType); + bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType); void DecodeVerts(); void SetShaderManager(ShaderManagerDX9 *shaderManager) { @@ -140,6 +140,9 @@ private: void ApplyDrawState(int prim); bool IsReallyAClear(int numVerts) const; + // Preprocessing for spline/bezier + u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType); + // drawcall ID u32 ComputeFastDCID(); u32 ComputeHash(); // Reads deferred vertex data. @@ -177,6 +180,9 @@ private: TransformedVertex *transformedExpanded; std::map vai_; + + // Fixed index buffer for easy quad generation from spline/bezier + u16 *quadIndices_; // Other ShaderManagerDX9 *shaderManager_; diff --git a/GPU/Directx9/helper/fbo.cpp b/GPU/Directx9/helper/fbo.cpp index 3d700971f2..72889d1bc5 100644 --- a/GPU/Directx9/helper/fbo.cpp +++ b/GPU/Directx9/helper/fbo.cpp @@ -89,6 +89,11 @@ void fbo_bind_as_render_target(FBO *fbo) { pD3Ddevice->SetDepthStencilSurface(fbo->depthstencil); } + +LPDIRECT3DTEXTURE9 fbo_get_color_texture(FBO *fbo) { + return fbo->tex; +} + void fbo_bind_for_read(FBO *fbo) { // pD3Ddevice->SetRenderTarget(0, fbo->surf); } diff --git a/GPU/Directx9/helper/fbo.h b/GPU/Directx9/helper/fbo.h index a7fa4ff1a7..4228bc2c5e 100644 --- a/GPU/Directx9/helper/fbo.h +++ b/GPU/Directx9/helper/fbo.h @@ -35,6 +35,8 @@ void fbo_destroy(FBO *fbo); void fbo_get_dimensions(FBO *fbo, int *w, int *h); void fbo_resolve(FBO *fbo); +LPDIRECT3DTEXTURE9 fbo_get_color_texture(FBO *fbo); + void * fbo_get_rtt(FBO *fbo); // To get default depth and rt surface diff --git a/GPU/Directx9/helper/global.cpp b/GPU/Directx9/helper/global.cpp index 893b96b8bd..1b5f796b64 100644 --- a/GPU/Directx9/helper/global.cpp +++ b/GPU/Directx9/helper/global.cpp @@ -31,7 +31,7 @@ static const char * vscode = " VS_OUT main( VS_IN In ) " " { " " VS_OUT Out; " - " Out.ProjPos = mul( matWVP, In.ObjPos ); " // Transform vertex into + " Out.ProjPos = In.ObjPos; " // Transform vertex into " Out.Uv = In.Uv; " " return Out; " // Transfer color " } ";