Changing where depth is read. Trying to use the same depth buffer GL uses when copying depth to a texture. This eliminates some quirky code and gets depth copies working in AA, but may not work on older graphics cards.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3234 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
donkopunchstania 2009-05-15 02:39:55 +00:00
parent 034e3c72a2
commit 14a67bc8bc
17 changed files with 321 additions and 553 deletions

View File

@ -26,7 +26,7 @@
// Mash together all the inputs that contribute to the code of a generated pixel shader into
// a unique identifier, basically containing all the bits. Yup, it's a lot ....
void GetPixelShaderId(PIXELSHADERUID &uid, u32 s_texturemask, u32 zbufrender, u32 zBufRenderToCol0, u32 dstAlphaEnable)
void GetPixelShaderId(PIXELSHADERUID &uid, u32 s_texturemask, u32 dstAlphaEnable)
{
u32 projtexcoords = 0;
for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; i++) {
@ -42,9 +42,7 @@ void GetPixelShaderId(PIXELSHADERUID &uid, u32 s_texturemask, u32 zbufrender, u3
((u32)dstAlphaEnable << 11) |
((u32)((bpmem.alphaFunc.hex >> 16) & 0xff) << 12) |
(projtexcoords << 20) |
((u32)bpmem.ztex2.op << 28) |
(zbufrender << 30) |
(zBufRenderToCol0 << 31);
((u32)bpmem.ztex2.op << 28);
uid.values[0] = (uid.values[0] & ~0x0ff00000) | (projtexcoords << 20);
// swap table
@ -134,7 +132,7 @@ static void WriteStage(char *&p, int n, u32 texture_mask);
static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, u32 texture_mask);
static void WriteAlphaCompare(char *&p, int num, int comp);
static bool WriteAlphaTest(char *&p, bool HLSL);
static void WriteFog(char *&p, bool bOutputZ);
static void WriteFog(char *&p);
const float epsilon8bit = 1.0f / 255.0f;
@ -369,7 +367,7 @@ static void BuildSwapModeTable()
}
}
const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool bRenderZToCol0, bool dstAlphaEnable, bool HLSL)
const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, bool HLSL)
{
text[sizeof(text) - 1] = 0x7C; // canary
DVSTARTPROFILE();
@ -383,13 +381,6 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool
WRITE(p, "//%i TEV stages, %i texgens, %i IND stages\n",
numStages, numTexgen, bpmem.genMode.numindstages);
bool bRenderZ = has_zbuffer_target && bpmem.zmode.updateenable;
bool bOutputZ = bpmem.ztex2.op != ZTEXTURE_DISABLE;
bool bInputZ = bpmem.ztex2.op==ZTEXTURE_ADD || bRenderZ || bpmem.fog.c_proj_fsel.fsel != 0;
// bool bRenderZToCol0 = ; // output z and alpha to color0
assert( !bRenderZToCol0 || bRenderZ );
int nIndirectStagesUsed = 0;
if (bpmem.genMode.numindstages > 0) {
for (int i = 0; i < numStages; ++i) {
@ -437,12 +428,8 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool
WRITE(p, "void main(\n");
WRITE(p, "out half4 ocol0 : COLOR0,\n");
if (bRenderZ && !bRenderZToCol0 )
WRITE(p, "out half4 ocol1 : COLOR1,\n");
if (bOutputZ )
WRITE(p, " out float depth : DEPTH,\n");
WRITE(p, " out half4 ocol0 : COLOR0,\n");
WRITE(p, " out float depth : DEPTH,\n");
// compute window position if needed because binding semantic WPOS is not widely supported
if (numTexgen < 7) {
@ -504,20 +491,18 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool
WRITE(p, "float4 clipPos = float4(uv0.w, uv1.w, uv2.w, uv3.w);\n");
}
if (bInputZ) {
// the screen space depth value = far z + (clip z / clip w) * z range
WRITE(p, "float zCoord = "I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * "I_ZBIAS"[1].y;\n");
}
// the screen space depth value = far z + (clip z / clip w) * z range
WRITE(p, "float zCoord = "I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * "I_ZBIAS"[1].y;\n");
if (bOutputZ) {
// use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format...
if (bpmem.ztex2.op == ZTEXTURE_ADD) {
WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w + zCoord);\n");
}
else {
_assert_(bpmem.ztex2.op == ZTEXTURE_REPLACE);
WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w);\n");
}
// use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format...
if (bpmem.ztex2.op == ZTEXTURE_ADD) {
WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w + zCoord);\n");
}
else if (bpmem.ztex2.op == ZTEXTURE_REPLACE) {
WRITE(p, "depth = frac(dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w);\n");
}
else {
WRITE(p, "depth = zCoord;\n");
}
//if (bpmem.genMode.numindstages ) WRITE(p, "prev.rg = indtex0.xy;\nprev.b = 0;\n");
@ -529,34 +514,14 @@ const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool
WRITE(p, "ocol0 = 0;\n");
}
else {
if (!bRenderZToCol0) {
if (dstAlphaEnable) {
WRITE(p, " ocol0 = float4(prev.rgb,"I_ALPHA"[0].w);\n");
} else {
WriteFog(p, bOutputZ);
WRITE(p, " ocol0 = prev;\n");
}
} else {
WRITE(p, " ocol0 = prev;\n");
}
}
if (bRenderZ) {
// write depth as color
if (bRenderZToCol0) {
if (bOutputZ )
WRITE(p, "ocol0.xyz = frac(float3(256.0f*256.0f, 256.0f, 1.0f) * depth);\n");
else
WRITE(p, "ocol0.xyz = frac(float3(256.0f*256.0f, 256.0f, 1.0f) * zCoord);\n");
WRITE(p, "ocol0.w = prev.w;\n");
}
else {
if (bOutputZ)
WRITE(p, "ocol1 = frac(float4(256.0f*256.0f, 256.0f, 1.0f, 0.0f) * depth);\n");
else
WRITE(p, "ocol1 = frac(float4(256.0f*256.0f, 256.0f, 1.0f, 0.0f) * zCoord);\n");
if (dstAlphaEnable) {
WRITE(p, " ocol0 = float4(prev.rgb,"I_ALPHA"[0].w);\n");
} else {
WriteFog(p);
WRITE(p, " ocol0 = prev;\n");
}
}
WRITE(p, "}\n");
if (text[sizeof(text) - 1] != 0x7C)
PanicAlert("PixelShader generator - buffer too small, canary has been eaten!");
@ -888,7 +853,7 @@ static bool WriteAlphaTest(char *&p, bool HLSL)
return true;
}
static void WriteFog(char *&p, bool bOutputZ)
static void WriteFog(char *&p)
{
bool enabled = bpmem.fog.c_proj_fsel.fsel == 0 ? false : true;
@ -896,11 +861,11 @@ static void WriteFog(char *&p, bool bOutputZ)
if (bpmem.fog.c_proj_fsel.proj == 0) {
// perspective
// ze = A/(B - Zs)
WRITE (p, " float ze = "I_FOG"[1].x / ("I_FOG"[1].y - %s);\n", bOutputZ ? "depth" : "zCoord");
WRITE (p, " float ze = "I_FOG"[1].x / ("I_FOG"[1].y - depth);\n");
} else {
// orthographic
// ze = a*Zs
WRITE (p, " float ze = "I_FOG"[1].x * %s;\n", bOutputZ ? "depth" : "zCoord");
WRITE (p, " float ze = "I_FOG"[1].x * depth;\n");
}
WRITE (p, " float fog = clamp(ze - "I_FOG"[1].z, 0.0f, 1.0f);\n");

View File

@ -92,7 +92,7 @@ public:
}
};
const char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool bRenderZToCol0, bool dstAlphaEnable, bool HLSL = false);
void GetPixelShaderId(PIXELSHADERUID &, u32 s_texturemask, u32 zbufrender, u32 zBufRenderToCol0, u32 dstAlphaEnable);
const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, bool HLSL = false);
void GetPixelShaderId(PIXELSHADERUID &, u32 s_texturemask, u32 dstAlphaEnable);
#endif

View File

@ -25,13 +25,12 @@
// Mash together all the inputs that contribute to the code of a generated vertex shader into
// a unique identifier, basically containing all the bits. Yup, it's a lot ....
void GetVertexShaderId(VERTEXSHADERUID& vid, u32 components, u32 zbufrender)
void GetVertexShaderId(VERTEXSHADERUID& vid, u32 components)
{
vid.values[0] = components |
(xfregs.numTexGens << 23) |
(xfregs.nNumChans << 27) |
((u32)xfregs.bEnableDualTexTransform << 29) |
(zbufrender << 30);
((u32)xfregs.bEnableDualTexTransform << 29);
for (int i = 0; i < 2; ++i) {
vid.values[1+i] = xfregs.colChans[i].color.enablelighting ?
@ -77,7 +76,7 @@ static char text[16384];
char *GenerateLightShader(char* p, int index, const LitChannel& chan, const char* dest, int coloralpha);
const char *GenerateVertexShader(u32 components, bool has_zbuffer_target)
const char *GenerateVertexShader(u32 components)
{
text[sizeof(text) - 1] = 0x7C; // canary
DVSTARTPROFILE();

View File

@ -94,7 +94,7 @@ public:
}
};
const char *GenerateVertexShader(u32 components, bool has_zbuffer_target);
void GetVertexShaderId(VERTEXSHADERUID& vid, u32 components, u32 zbufrender);
const char *GenerateVertexShader(u32 components);
void GetVertexShaderId(VERTEXSHADERUID& vid, u32 components);
#endif

View File

@ -67,7 +67,7 @@ void PixelShaderCache::SetShader()
DVSTARTPROFILE();
PIXELSHADERUID uid;
GetPixelShaderId(uid, PixelShaderManager::GetTextureMask(), false, false, false);
GetPixelShaderId(uid, PixelShaderManager::GetTextureMask(), false);
PSCache::iterator iter;
iter = PixelShaders.find(uid);
@ -85,7 +85,7 @@ void PixelShaderCache::SetShader()
}
bool HLSL = false;
const char *code = GeneratePixelShader(PixelShaderManager::GetTextureMask(), false, false, false, HLSL);
const char *code = GeneratePixelShader(PixelShaderManager::GetTextureMask(), false, HLSL);
LPDIRECT3DPIXELSHADER9 shader = HLSL ? D3D::CompilePixelShader(code, (int)strlen(code), false) : CompileCgShader(code);
if (shader)
{

View File

@ -69,7 +69,7 @@ void VertexShaderCache::SetShader(u32 components)
DVSTARTPROFILE();
VERTEXSHADERUID uid;
GetVertexShaderId(uid, components, false);
GetVertexShaderId(uid, components);
VSCache::iterator iter;
iter = vshaders.find(uid);
@ -86,7 +86,7 @@ void VertexShaderCache::SetShader(u32 components)
}
bool HLSL = false;
const char *code = GenerateVertexShader(components, false);
const char *code = GenerateVertexShader(components);
LPDIRECT3DVERTEXSHADER9 shader = HLSL ? D3D::CompileVertexShader(code, (int)strlen(code), false) : CompileCgShader(code);
if (shader)
{

View File

@ -88,9 +88,6 @@ void SetDepthMode(const Bypass &bp)
glDisable(GL_DEPTH_TEST);
glDepthMask(GL_FALSE);
}
if (!bpmem.zmode.updateenable)
Renderer::SetRenderMode(Renderer::RM_Normal);
}
void SetBlendMode(const Bypass &bp)
{
@ -160,68 +157,38 @@ void RenderToXFB(const Bypass &bp, const TRectangle &multirc, const float &yScal
}
void ClearScreen(const Bypass &bp, const TRectangle &multirc)
{
// Clear color
Renderer::SetRenderMode(Renderer::RM_Normal);
// Clear Z-Buffer target
bool bRestoreZBufferTarget = Renderer::UseFakeZTarget();
// Update the view port for clearing the picture
glViewport(0, 0, Renderer::GetTargetWidth(), Renderer::GetTargetHeight());
// Update the view port for clearing the picture
glViewport(0, 0, Renderer::GetTargetWidth(), Renderer::GetTargetHeight());
// Always set the scissor in case it was set by the game and has not been reset
glScissor(multirc.left, (Renderer::GetTargetHeight() - multirc.bottom),
(multirc.right - multirc.left), (multirc.bottom - multirc.top));
// ---------------------------
// Always set the scissor in case it was set by the game and has not been reset
glScissor(multirc.left, (Renderer::GetTargetHeight() - multirc.bottom),
(multirc.right - multirc.left), (multirc.bottom - multirc.top));
// ---------------------------
VertexShaderManager::SetViewportChanged();
VertexShaderManager::SetViewportChanged();
// Since clear operations use the source rectangle, we have to do
// regular renders (glClear clears the entire buffer)
if (bpmem.blendmode.colorupdate || bpmem.blendmode.alphaupdate || bpmem.zmode.updateenable)
{
GLbitfield bits = 0;
if (bpmem.blendmode.colorupdate || bpmem.blendmode.alphaupdate)
{
u32 clearColor = (bpmem.clearcolorAR << 16) | bpmem.clearcolorGB;
glClearColor(((clearColor>>16) & 0xff)*(1/255.0f),
((clearColor>>8 ) & 0xff)*(1/255.0f),
((clearColor>>0 ) & 0xff)*(1/255.0f),
((clearColor>>24) & 0xff)*(1/255.0f));
bits |= GL_COLOR_BUFFER_BIT;
}
if (bpmem.zmode.updateenable)
{
glClearDepth((float)(bpmem.clearZValue & 0xFFFFFF) / float(0xFFFFFF));
bits |= GL_DEPTH_BUFFER_BIT;
}
if (bRestoreZBufferTarget)
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); // don't clear ztarget here
glClear(bits);
}
// Have to clear the target zbuffer
if (bpmem.zmode.updateenable && bRestoreZBufferTarget)
// Since clear operations use the source rectangle, we have to do
// regular renders (glClear clears the entire buffer)
if (bpmem.blendmode.colorupdate || bpmem.blendmode.alphaupdate || bpmem.zmode.updateenable)
{
GLbitfield bits = 0;
if (bpmem.blendmode.colorupdate || bpmem.blendmode.alphaupdate)
{
glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT);
GL_REPORT_ERRORD();
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
// red should probably be the LSB
glClearColor(((bpmem.clearZValue>>0)&0xff)*(1/255.0f),
((bpmem.clearZValue>>8)&0xff)*(1/255.0f),
((bpmem.clearZValue>>16)&0xff)*(1/255.0f), 0);
glClear(GL_COLOR_BUFFER_BIT);
Renderer::SetColorMask();
GL_REPORT_ERRORD();
u32 clearColor = (bpmem.clearcolorAR << 16) | bpmem.clearcolorGB;
glClearColor(((clearColor>>16) & 0xff)*(1/255.0f),
((clearColor>>8 ) & 0xff)*(1/255.0f),
((clearColor>>0 ) & 0xff)*(1/255.0f),
((clearColor>>24) & 0xff)*(1/255.0f));
bits |= GL_COLOR_BUFFER_BIT;
}
if (bRestoreZBufferTarget)
if (bpmem.zmode.updateenable)
{
// restore target
GLenum s_drawbuffers[2] = {GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT};
glDrawBuffers(2, s_drawbuffers);
glClearDepth((float)(bpmem.clearZValue & 0xFFFFFF) / float(0xFFFFFF));
bits |= GL_DEPTH_BUFFER_BIT;
}
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
glClear(bits);
}
}
void RestoreRenderState(const Bypass &bp)
@ -256,4 +223,4 @@ void SetInterlacingMode(const Bypass &bp)
{
// TODO
}
};
};

View File

@ -36,6 +36,7 @@
static int s_nMaxPixelInstructions;
static GLuint s_ColorMatrixProgram = 0;
static GLuint s_DepthMatrixProgram = 0;
PixelShaderCache::PSCache PixelShaderCache::pshaders;
PIXELSHADERUID PixelShaderCache::s_curuid;
bool PixelShaderCache::s_displayCompileAlert;
@ -86,12 +87,42 @@ void PixelShaderCache::Init()
glDeleteProgramsARB(1, &s_ColorMatrixProgram);
s_ColorMatrixProgram = 0;
}
sprintf(pmatrixprog, "!!ARBfp1.0"
"TEMP R0;\n"
"TEMP R1;\n"
"TEMP R2;\n"
"PARAM K0 = { 65536.0, 256.0 };\n"
"TEX R2, fragment.texcoord[0], texture[0], RECT;\n"
"MUL R0.x, R2.x, K0.x;\n"
"FRC R0.x, R0.x;\n"
"MUL R0.y, R2.x, K0.y;\n"
"FRC R0.y, R0.y;\n"
"MOV R0.z, R2.x;\n"
"DP4 R1.x, R0, program.env[%d];\n"
"DP4 R1.y, R0, program.env[%d];\n"
"DP4 R1.z, R0, program.env[%d];\n"
"DP4 R1.w, R0, program.env[%d];\n"
"ADD result.color, R1, program.env[%d];\n"
"END\n", C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+2, C_COLORMATRIX+3, C_COLORMATRIX+4);
glGenProgramsARB(1, &s_DepthMatrixProgram);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, s_DepthMatrixProgram);
glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pmatrixprog), pmatrixprog);
err = GL_REPORT_ERROR();
if (err != GL_NO_ERROR) {
ERROR_LOG(VIDEO, "Failed to create depth matrix fragment program");
glDeleteProgramsARB(1, &s_DepthMatrixProgram);
s_DepthMatrixProgram = 0;
}
}
void PixelShaderCache::Shutdown()
{
glDeleteProgramsARB(1, &s_ColorMatrixProgram);
s_ColorMatrixProgram = 0;
glDeleteProgramsARB(1, &s_DepthMatrixProgram);
s_DepthMatrixProgram = 0;
PSCache::iterator iter = pshaders.begin();
for (; iter != pshaders.end(); iter++)
iter->second.Destroy();
@ -103,15 +134,18 @@ GLuint PixelShaderCache::GetColorMatrixProgram()
return s_ColorMatrixProgram;
}
GLuint PixelShaderCache::GetDepthMatrixProgram()
{
return s_DepthMatrixProgram;
}
FRAGMENTSHADER* PixelShaderCache::GetShader(bool dstAlphaEnable)
{
DVSTARTPROFILE();
PIXELSHADERUID uid;
u32 zbufrender = (Renderer::UseFakeZTarget() && bpmem.zmode.updateenable) ? 1 : 0;
u32 zBufRenderToCol0 = Renderer::GetRenderMode() != Renderer::RM_Normal;
u32 dstAlpha = dstAlphaEnable ? 1 : 0;
GetPixelShaderId(uid, PixelShaderManager::GetTextureMask(), zbufrender, zBufRenderToCol0, dstAlpha);
GetPixelShaderId(uid, PixelShaderManager::GetTextureMask(), dstAlpha);
PSCache::iterator iter = pshaders.find(uid);
@ -127,8 +161,6 @@ FRAGMENTSHADER* PixelShaderCache::GetShader(bool dstAlphaEnable)
PSCacheEntry& newentry = pshaders[uid];
const char *code = GeneratePixelShader(PixelShaderManager::GetTextureMask(),
Renderer::UseFakeZTarget(),
Renderer::GetRenderMode() != Renderer::RM_Normal,
dstAlphaEnable);
#if defined(_DEBUG) || defined(DEBUGFAST)

View File

@ -65,6 +65,8 @@ public:
static bool CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrprogram);
static GLuint GetColorMatrixProgram();
static GLuint GetDepthMatrixProgram();
};
#endif // _PIXELSHADERCACHE_H_

View File

@ -81,27 +81,27 @@ static bool s_bAVIDumping = false;
static FILE* f_pFrameDump;
#endif
static int nZBufferRender = 0; // if > 0, then use zbuffer render, and count down.
// 1 for no MSAA. Use s_MSAASamples > 1 to check for MSAA.
static int s_MSAASamples = 1;
static int s_MSAACoverageSamples = 0;
// Normal Mode
//
// By default the depth target is used
// if there is an error creating and attaching it a depth buffer will be used instead
//
// s_RenderTarget is a texture_rect
// s_DepthTarget is a Z renderbuffer
// s_FakeZTarget is a texture_rect
// s_DepthTarget is a texture_rect
// s_DepthBuffer is a Z renderbuffer
// MSAA mode
// s_uFramebuffer is a FBO
// s_RenderTarget is a MSAA renderbuffer
// s_FakeZBufferTarget is a MSAA renderbuffer
// s_DepthTarget is a real MSAA z/stencilbuffer
// s_DepthTarget is a MSAA renderbuffer
//
// s_ResolvedFramebuffer is a FBO
// s_ResolvedColorTarget is a texture
// s_ResolvedFakeZTarget is a texture
// s_ResolvedDepthTarget is a Z renderbuffer
// s_ResolvedRenderTarget is a texture
// s_ResolvedDepthTarget is a texture
// A framebuffer is a set of render targets: a color and a z buffer. They can be either RenderBuffers or Textures.
static GLuint s_uFramebuffer = 0;
@ -110,11 +110,10 @@ static GLuint s_uResolvedFramebuffer = 0;
// The size of these should be a (not necessarily even) multiple of the EFB size, 640x528, but isn't.
// These are all texture IDs. Bind them as rect arb textures.
static GLuint s_RenderTarget = 0;
static GLuint s_FakeZTarget = 0;
static GLuint s_DepthTarget = 0;
static GLuint s_DepthBuffer = 0;
static GLuint s_ResolvedRenderTarget = 0;
static GLuint s_ResolvedFakeZTarget = 0;
static GLuint s_ResolvedDepthTarget = 0;
static bool s_bATIDrawBuffers = false;
@ -128,8 +127,6 @@ static volatile bool s_bScreenshot = false;
static Common::CriticalSection s_criticalScreenshot;
static std::string s_sScreenshotName;
static Renderer::RenderMode s_RenderMode = Renderer::RM_Normal;
int frameCount;
static int s_fps = 0;
@ -328,42 +325,35 @@ bool Renderer::Init()
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, s_targetwidth, s_targetheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
SetDefaultRectTexParams();
GLint nMaxMRT = 0;
glGetIntegerv(GL_MAX_COLOR_ATTACHMENTS_EXT, &nMaxMRT);
if (nMaxMRT > 1)
{
// There's MRT support. Create a color texture image to use as secondary render target.
// We use MRT to render Z into this one, for various purposes (mostly copy Z to texture).
glGenTextures(1, (GLuint *)&s_FakeZTarget);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_FakeZTarget);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, s_targetwidth, s_targetheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
SetDefaultRectTexParams();
}
// Create the real depth/stencil buffer. It's a renderbuffer, not a texture.
glGenRenderbuffersEXT(1, &s_DepthTarget);
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, s_DepthTarget);
glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH24_STENCIL8_EXT, s_targetwidth, s_targetheight);
// Create the depth target texture
glGenTextures(1, &s_DepthTarget);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_DepthTarget);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_DEPTH_COMPONENT24, s_targetwidth, s_targetheight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL);
SetDefaultRectTexParams();
// Our framebuffer object is still bound here. Attach the two render targets, color and Z/stencil, to the framebuffer object.
// Our framebuffer object is still bound here. Attach the two render targets, color and depth, to the framebuffer object.
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, s_RenderTarget, 0);
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, s_DepthTarget);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_RECTANGLE_ARB, s_DepthTarget, 0);
GL_REPORT_FBO_ERROR();
if (s_FakeZTarget != 0) {
// We do a simple test to make sure that MRT works. I don't really know why - this is probably a workaround for
// some terribly buggy ancient driver.
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_RECTANGLE_ARB, s_FakeZTarget, 0);
bool bFailed = glGetError() != GL_NO_ERROR || glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE_EXT;
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_RECTANGLE_ARB, 0, 0);
if (bFailed) {
glDeleteTextures(1, (GLuint *)&s_FakeZTarget);
s_FakeZTarget = 0;
}
}
bool bFailed = glGetError() != GL_NO_ERROR || glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE_EXT;
if (s_FakeZTarget == 0)
ERROR_LOG(VIDEO, "Disabling ztarget MRT feature (max MRT = %d)", nMaxMRT);
// Check that the FBO is attached. If there is an error revert to a depth buffer.
if (bFailed) {
ERROR_LOG(VIDEO, "Disabling ztarget feature");
// detach and delete depth texture
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_RECTANGLE_ARB, 0, 0);
glDeleteTextures(1, (GLuint *)&s_DepthTarget);
s_DepthTarget = 0;
// create and attach depth buffer
glGenRenderbuffersEXT(1, (GLuint *)&s_DepthBuffer);
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, s_DepthBuffer);
glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH24_STENCIL8_EXT, s_targetwidth, s_targetheight);
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, s_DepthBuffer);
GL_REPORT_FBO_ERROR();
}
}
else
{
@ -376,25 +366,17 @@ bool Renderer::Init()
} else {
glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, s_MSAASamples, GL_RGBA, s_targetwidth, s_targetheight);
}
glGenRenderbuffersEXT(1, &s_FakeZTarget);
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, s_FakeZTarget);
if (s_MSAACoverageSamples) {
glRenderbufferStorageMultisampleCoverageNV(GL_RENDERBUFFER_EXT, s_MSAACoverageSamples, s_MSAASamples, GL_RGBA, s_targetwidth, s_targetheight);
} else {
glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, s_MSAASamples, GL_RGBA, s_targetwidth, s_targetheight);
}
glGenRenderbuffersEXT(1, &s_DepthTarget);
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, s_DepthTarget);
if (s_MSAACoverageSamples) {
glRenderbufferStorageMultisampleCoverageNV(GL_RENDERBUFFER_EXT, s_MSAACoverageSamples, s_MSAASamples, GL_DEPTH24_STENCIL8_EXT, s_targetwidth, s_targetheight);
glRenderbufferStorageMultisampleCoverageNV(GL_RENDERBUFFER_EXT, s_MSAACoverageSamples, s_MSAASamples, GL_DEPTH_COMPONENT24, s_targetwidth, s_targetheight);
} else {
glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, s_MSAASamples, GL_DEPTH24_STENCIL8_EXT, s_targetwidth, s_targetheight);
glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, s_MSAASamples, GL_DEPTH_COMPONENT24, s_targetwidth, s_targetheight);
}
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, 0);
// Attach them to our multisampled FBO. The multisampled FBO is still bound here.
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_EXT, s_RenderTarget);
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_RENDERBUFFER_EXT, s_FakeZTarget);
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, s_DepthTarget);
GL_REPORT_FBO_ERROR();
@ -411,25 +393,23 @@ bool Renderer::Init()
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, s_targetwidth, s_targetheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
SetDefaultRectTexParams();
// Generate the resolve targets.
glGenTextures(1, (GLuint *)&s_ResolvedFakeZTarget);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_ResolvedFakeZTarget);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, s_targetwidth, s_targetheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glGenTextures(1, (GLuint *)&s_ResolvedDepthTarget);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_ResolvedDepthTarget);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_DEPTH_COMPONENT, s_targetwidth, s_targetheight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL);
SetDefaultRectTexParams();
// Create the real depth/stencil buffer. It's a renderbuffer, not a texture.
glGenRenderbuffersEXT(1, &s_ResolvedDepthTarget);
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, s_ResolvedDepthTarget);
glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH24_STENCIL8_EXT, s_targetwidth, s_targetheight);
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, 0);
// Attach our resolve targets to our resolved FBO.
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, s_ResolvedRenderTarget, 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_RECTANGLE_ARB, s_ResolvedFakeZTarget, 0);
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, s_ResolvedDepthTarget);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_RECTANGLE_ARB, s_ResolvedDepthTarget, 0);
GL_REPORT_FBO_ERROR();
bFailed = glGetError() != GL_NO_ERROR || glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE_EXT;
if (bFailed) PanicAlert("Incomplete rt2");
if (bFailed) {
ERROR_LOG(VIDEO, "AA rendering init failed.");
}
}
if (GL_REPORT_ERROR() != GL_NO_ERROR)
@ -438,8 +418,6 @@ bool Renderer::Init()
// glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, s_uFramebuffer);
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
nZBufferRender = 0; // Initialize the Z render shutoff countdown. We only render Z if it's desired, to save GPU power.
if (GL_REPORT_ERROR() != GL_NO_ERROR)
bSuccess = false;
@ -477,7 +455,6 @@ bool Renderer::Init()
cgGLSetDebugMode(GL_FALSE);
#endif
s_RenderMode = Renderer::RM_Normal;
if (!InitializeGL())
return false;
@ -592,12 +569,6 @@ void Renderer::SetRenderTarget(GLuint targ)
targ != 0 ? targ : s_RenderTarget, 0);
}
void Renderer::SetDepthTarget(GLuint targ)
{
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT,
targ != 0 ? targ : s_DepthTarget);
}
void Renderer::SetFramebuffer(GLuint fb)
{
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb != 0 ? fb : s_uFramebuffer);
@ -628,38 +599,39 @@ GLuint Renderer::ResolveAndGetRenderTarget(const TRectangle &source_rect)
}
}
GLuint Renderer::ResolveAndGetFakeZTarget(const TRectangle &source_rect)
GLuint Renderer::ResolveAndGetDepthTarget(const TRectangle &source_rect)
{
// This logic should be moved elsewhere.
if (s_MSAASamples > 1)
{
// Flip the rectangle
TRectangle flipped_rect;
source_rect.FlipYPosition(GetTargetHeight(), &flipped_rect);
//source_rect.FlipYPosition(GetTargetHeight(), &flipped_rect);
// donkopunchstania - some bug causes the offsets to be ignored. driver bug?
flipped_rect.top = 0;
flipped_rect.bottom = GetTargetHeight();
flipped_rect.left = 0;
flipped_rect.right = GetTargetWidth();
flipped_rect.Clamp(0, 0, GetTargetWidth(), GetTargetHeight());
// Do the resolve. We resolve both color channels, not very necessary.
// Do the resolve.
glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, s_uFramebuffer);
glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, s_uResolvedFramebuffer);
glBlitFramebufferEXT(flipped_rect.left, flipped_rect.top, flipped_rect.right, flipped_rect.bottom,
flipped_rect.left, flipped_rect.top, flipped_rect.right, flipped_rect.bottom,
GL_COLOR_BUFFER_BIT, GL_NEAREST);
GL_DEPTH_BUFFER_BIT, GL_NEAREST);
// Return the resolved target.
return s_ResolvedFakeZTarget;
return s_ResolvedDepthTarget;
}
else
{
return s_FakeZTarget;
return s_DepthTarget;
}
}
bool Renderer::UseFakeZTarget()
{
// This logic should be moved elsewhere.
return nZBufferRender > 0;
}
void Renderer::ResetGLState()
{
// Gets us to a reasonably sane state where it's possible to do things like
@ -801,151 +773,6 @@ bool Renderer::IsUsingATIDrawBuffers()
return s_bATIDrawBuffers;
}
bool Renderer::HaveStencilBuffer()
{
return s_bHaveStencilBuffer;
}
void Renderer::SetZBufferRender()
{
nZBufferRender = 10; // The game asked for Z. Give it 10 frames, then turn it off for speed.
GLenum s_drawbuffers[2] = {
GL_COLOR_ATTACHMENT0_EXT,
GL_COLOR_ATTACHMENT1_EXT
};
glDrawBuffers(2, s_drawbuffers);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_RECTANGLE_ARB, s_FakeZTarget, 0);
_assert_(glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) == GL_FRAMEBUFFER_COMPLETE_EXT);
}
// Does this function even work correctly???
void Renderer::FlushZBufferAlphaToTarget()
{
ResetGLState();
SetRenderTarget(0);
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_TRUE);
glViewport(0, 0, GetTargetWidth(), GetTargetHeight());
// disable all other stages
for (int i = 1; i < 8; ++i)
TextureMngr::DisableStage(i);
// texture map s_RenderTargets[s_curtarget] onto the main buffer
glActiveTexture(GL_TEXTURE0);
glEnable(GL_TEXTURE_RECTANGLE_ARB);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_FakeZTarget);
GL_REPORT_ERRORD();
// setup the stencil to only accept pixels that have been written
glStencilFunc(GL_EQUAL, 1, 0xff);
glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
// TODO: This code should not have to bother with stretchtofit checking -
// all necessary scale initialization should be done elsewhere.
if (s_bNativeResolution)
{
//TODO: Do Correctly in a bit
float FactorW = 640.f / (float)OpenGL_GetBackbufferWidth();
float FactorH = 480.f / (float)OpenGL_GetBackbufferHeight();
float Max = (FactorW < FactorH) ? FactorH : FactorW;
float Temp = 1.0f / Max;
FactorW *= Temp;
FactorH *= Temp;
glBegin(GL_QUADS);
glTexCoord2f(0, 0); glVertex2f(-FactorW,-FactorH);
glTexCoord2f(0, (float)GetTargetHeight()); glVertex2f(-FactorW,FactorH);
glTexCoord2f((float)GetTargetWidth(), (float)GetTargetHeight()); glVertex2f(FactorW,FactorH);
glTexCoord2f((float)GetTargetWidth(), 0); glVertex2f(FactorW,-FactorH);
glEnd();
}
else
{
glBegin(GL_QUADS);
glTexCoord2f(0, 0); glVertex2f(-1,-1);
glTexCoord2f(0, (float)(GetTargetHeight())); glVertex2f(-1,1);
glTexCoord2f((float)(GetTargetWidth()), (float)(GetTargetHeight())); glVertex2f(1,1);
glTexCoord2f((float)(GetTargetWidth()), 0); glVertex2f(1,-1);
glEnd();
}
GL_REPORT_ERRORD();
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
RestoreGLState();
}
void Renderer::SetRenderMode(RenderMode mode)
{
if (!s_bHaveStencilBuffer && mode == RM_ZBufferAlpha)
mode = RM_ZBufferOnly;
if (s_RenderMode == mode)
return;
if (mode == RM_Normal) {
// flush buffers
if (s_RenderMode == RM_ZBufferAlpha) {
FlushZBufferAlphaToTarget();
glDisable(GL_STENCIL_TEST);
}
SetColorMask();
SetRenderTarget(0);
SetZBufferRender();
GL_REPORT_ERRORD();
}
else if (s_RenderMode == RM_Normal) {
// setup buffers
_assert_(UseFakeZTarget() && bpmem.zmode.updateenable);
if (mode == RM_ZBufferAlpha) {
glEnable(GL_STENCIL_TEST);
glClearStencil(0);
glClear(GL_STENCIL_BUFFER_BIT);
glStencilFunc(GL_ALWAYS, 1, 0xff);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
}
glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
GL_REPORT_ERRORD();
}
else {
_assert_(UseFakeZTarget());
_assert_(s_bHaveStencilBuffer);
if (mode == RM_ZBufferOnly) {
// flush and remove stencil
_assert_(s_RenderMode == RM_ZBufferAlpha);
FlushZBufferAlphaToTarget();
glDisable(GL_STENCIL_TEST);
SetRenderTarget(s_FakeZTarget);
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
GL_REPORT_ERRORD();
}
else {
_assert_(mode == RM_ZBufferAlpha && s_RenderMode == RM_ZBufferOnly);
// setup stencil
glEnable(GL_STENCIL_TEST);
glClearStencil(0);
glClear(GL_STENCIL_BUFFER_BIT);
glStencilFunc(GL_ALWAYS, 1, 0xff);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
}
}
s_RenderMode = mode;
}
Renderer::RenderMode Renderer::GetRenderMode()
{
return s_RenderMode;
}
void ComputeBackbufferRectangle(TRectangle *rc)
{
float FloatGLWidth = (float)OpenGL_GetBackbufferWidth();
@ -1021,8 +848,6 @@ void Renderer::Swap(const TRectangle& rc)
OpenGL_Update(); // just updates the render window position and the backbuffer size
DVSTARTPROFILE();
Renderer::SetRenderMode(Renderer::RM_Normal);
ResetGLState();
TRectangle back_rc;
@ -1412,17 +1237,6 @@ void Renderer::SwapBuffers()
// Render to the framebuffer.
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, s_uFramebuffer);
if (nZBufferRender > 0)
{
if (--nZBufferRender == 0)
{
// turn off
nZBufferRender = 0;
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_RECTANGLE_ARB, 0, 0);
Renderer::SetRenderMode(RM_Normal); // turn off any zwrites
}
}
GL_REPORT_ERRORD();
}

View File

@ -41,19 +41,9 @@ extern int frameCount;
class Renderer
{
private:
static void FlushZBufferAlphaToTarget();
public:
enum RenderMode
{
RM_Normal=0, // normal target as color0, ztarget as color1
RM_ZBufferOnly, // zbuffer as color0
RM_ZBufferAlpha, // zbuffer as color0, also will dump alpha info to regular target once mode is switched
// use stencil buffer to indicate what pixels were written
};
static bool Init();
static bool Init();
static void Shutdown();
// initialize opengl standard values (like viewport)
@ -65,15 +55,11 @@ public:
static void SwapBuffers();
static bool IsUsingATIDrawBuffers();
static bool HaveStencilBuffer();
static void SetColorMask();
static void SetBlendMode(bool forceUpdate);
static bool SetScissorRect();
static void SetRenderMode(RenderMode mode);
static RenderMode GetRenderMode();
// Render target management
static int GetTargetWidth();
static int GetTargetHeight();
@ -83,9 +69,7 @@ public:
static float GetTargetScaleY();
static void SetFramebuffer(GLuint fb);
static void SetZBufferRender(); // sets rendering of the zbuffer using MRTs
static void SetRenderTarget(GLuint targ); // if targ is 0, sets to original render target
static void SetDepthTarget(GLuint targ);
// If in MSAA mode, this will perform a resolve of the specified rectangle, and return the resolve target as a texture ID.
// Thus, this call may be expensive. Don't repeat it unnecessarily.
@ -93,10 +77,9 @@ public:
// After calling this, before you render anything else, you MUST bind the framebuffer you want to draw to.
static GLuint ResolveAndGetRenderTarget(const TRectangle &rect);
// Same as above but for the FakeZ Target.
// Same as above but for the depth Target.
// After calling this, before you render anything else, you MUST bind the framebuffer you want to draw to.
static GLuint ResolveAndGetFakeZTarget(const TRectangle &rect);
static bool UseFakeZTarget(); // This is used by some functions to check for Z target existence.
static GLuint ResolveAndGetDepthTarget(const TRectangle &rect);
// Random utilities
static void RenderText(const char* pstr, int left, int top, u32 color);

View File

@ -165,6 +165,51 @@ void WriteSwizzler(char*& p, u32 format)
" sampleUv.y = sampleUv.y + textureDims.w;\n");
}
// block dimensions : widthStride, heightStride
// texture dims : width, height, x offset, y offset
void Write32BitSwizzler(char*& p, u32 format)
{
WRITE(p, "uniform float4 blkDims : register(c%d);\n", C_COLORMATRIX);
WRITE(p, "uniform float4 textureDims : register(c%d);\n", C_COLORMATRIX + 1);
float blkW = (float)GetBlockWidthInTexels(format);
float blkH = (float)GetBlockHeightInTexels(format);
float samples = (float)GetEncodedSampleCount(format);
// 32 bit textures (RGBA8 and Z24) are store in 2 cache line increments
WRITE(p,
"uniform samplerRECT samp0 : register(s0);\n"
"void main(\n"
" out float4 ocol0 : COLOR0,\n"
" in float2 uv0 : TEXCOORD0)\n"
"{\n"
" float2 sampleUv;\n"
" float2 uv1 = floor(uv0);\n");
WRITE(p, " float yl = floor(uv1.y / %f);\n", blkH);
WRITE(p, " float yb = yl * %f;\n", blkH);
WRITE(p, " float yoff = uv1.y - yb;\n");
WRITE(p, " float xp = uv1.x + (yoff * textureDims.x);\n");
WRITE(p, " float xel = floor(xp / 2);\n");
WRITE(p, " float xb = floor(xel / %f);\n", blkH);
WRITE(p, " float xoff = xel - (xb * %f);\n", blkH);
WRITE(p, " float x2 = uv1.x * 2;\n");
WRITE(p, " float xl = floor(x2 / %f);\n", blkW);
WRITE(p, " float xib = x2 - (xl * %f);\n", blkW);
WRITE(p, " float halfxb = floor(xb / 2);\n");
WRITE(p, " sampleUv.x = xib + (halfxb * %f);\n", blkW);
WRITE(p, " sampleUv.y = yb + xoff;\n");
WRITE(p, " sampleUv = sampleUv * blkDims.xy;\n");
WRITE(p, " sampleUv.y = textureDims.y - sampleUv.y;\n");
WRITE(p, " sampleUv.x = sampleUv.x + textureDims.z;\n");
WRITE(p, " sampleUv.y = sampleUv.y + textureDims.w;\n");
}
void WriteSampleColor(char*& p, const char* colorComp, const char* dest)
{
WRITE(p, " %s = texRECT(samp0, sampleUv).%s;\n", dest, colorComp);
@ -432,48 +477,9 @@ void WriteRGBA4443Encoder(char* p)
WRITE(p, "}\n");
}
// block dimensions : widthStride, heightStride
// texture dims : width, height, x offset, y offset
void WriteRGBA8Encoder(char* p, bool fromDepth)
void WriteRGBA8Encoder(char* p)
{
WRITE(p, "uniform float4 blkDims : register(c%d);\n", C_COLORMATRIX);
WRITE(p, "uniform float4 textureDims : register(c%d);\n", C_COLORMATRIX + 1);
float blkW = (float)GetBlockWidthInTexels(GX_TF_RGBA8);
float blkH = (float)GetBlockHeightInTexels(GX_TF_RGBA8);
float samples = (float)GetEncodedSampleCount(GX_TF_RGBA8);
// Swizzling for RGBA8 format
WRITE(p,
"uniform samplerRECT samp0 : register(s0);\n"
"void main(\n"
" out float4 ocol0 : COLOR0,\n"
" in float2 uv0 : TEXCOORD0)\n"
"{\n"
" float2 sampleUv;\n"
" float2 uv1 = floor(uv0);\n");
WRITE(p, " float yl = floor(uv1.y / %f);\n", blkH);
WRITE(p, " float yb = yl * %f;\n", blkH);
WRITE(p, " float yoff = uv1.y - yb;\n");
WRITE(p, " float xp = uv1.x + (yoff * textureDims.x);\n");
WRITE(p, " float xel = floor(xp / 2);\n");
WRITE(p, " float xb = floor(xel / %f);\n", blkH);
WRITE(p, " float xoff = xel - (xb * %f);\n", blkH);
WRITE(p, " float x2 = uv1.x * 2;\n");
WRITE(p, " float xl = floor(x2 / %f);\n", blkW);
WRITE(p, " float xib = x2 - (xl * %f);\n", blkW);
WRITE(p, " float halfxb = floor(xb / 2);\n");
WRITE(p, " sampleUv.x = xib + (halfxb * %f);\n", blkW);
WRITE(p, " sampleUv.y = yb + xoff;\n");
WRITE(p, " sampleUv = sampleUv * blkDims.xy;\n");
WRITE(p, " sampleUv.y = textureDims.y - sampleUv.y;\n");
WRITE(p, " sampleUv.x = sampleUv.x + textureDims.z;\n");
WRITE(p, " sampleUv.y = sampleUv.y + textureDims.w;\n");
Write32BitSwizzler(p, GX_TF_RGBA8);
WRITE(p, " float cl1 = xb - (halfxb * 2);\n");
WRITE(p, " float cl0 = 1.0f - cl1;\n");
@ -483,10 +489,7 @@ void WriteRGBA8Encoder(char* p, bool fromDepth)
WRITE(p, " float4 color1;\n");
WriteSampleColor(p, "rgba", "texSample");
if(fromDepth)
WRITE(p, " color0.b = 1.0f;\n");
else
WRITE(p, " color0.b = texSample.a;\n");
WRITE(p, " color0.b = texSample.a;\n");
WRITE(p, " color0.g = texSample.r;\n");
WRITE(p, " color1.b = texSample.g;\n");
WRITE(p, " color1.g = texSample.b;\n");
@ -494,10 +497,7 @@ void WriteRGBA8Encoder(char* p, bool fromDepth)
WriteIncrementSampleX(p);
WriteSampleColor(p, "rgba", "texSample");
if(fromDepth)
WRITE(p, " color0.r = 1.0f;\n");
else
WRITE(p, " color0.r = texSample.a;\n");
WRITE(p, " color0.r = texSample.a;\n");
WRITE(p, " color0.a = texSample.r;\n");
WRITE(p, " color1.r = texSample.g;\n");
WRITE(p, " color1.a = texSample.b;\n");
@ -606,6 +606,101 @@ void WriteCC8Encoder(char* p, const char* comp)
WRITE(p, "}\n");
}
void WriteZ8Encoder(char* p, const char* multiplier)
{
WriteSwizzler(p, GX_CTF_Z8M);
WRITE(p, " float depth;\n");
WriteSampleColor(p, "b", "depth");
WRITE(p, "ocol0.b = frac(depth * %s);\n", multiplier);
WriteIncrementSampleX(p);
WriteSampleColor(p, "b", "depth");
WRITE(p, "ocol0.g = frac(depth * %s);\n", multiplier);
WriteIncrementSampleX(p);
WriteSampleColor(p, "b", "depth");
WRITE(p, "ocol0.r = frac(depth * %s);\n", multiplier);
WriteIncrementSampleX(p);
WriteSampleColor(p, "b", "depth");
WRITE(p, "ocol0.a = frac(depth * %s);\n", multiplier);
WRITE(p, "}\n");
}
void WriteZ16Encoder(char* p)
{
WriteSwizzler(p, GX_TF_Z16);
WRITE(p, " float depth;\n");
// byte order is reversed
WriteSampleColor(p, "b", "depth");
WRITE(p, " ocol0.b = frac(depth * 256.0f);\n");
WRITE(p, " ocol0.g = depth;\n");
WriteIncrementSampleX(p);
WriteSampleColor(p, "b", "depth");
WRITE(p, " ocol0.r = frac(depth * 256.0f);\n");
WRITE(p, " ocol0.a = depth;\n");
WRITE(p, "}\n");
}
void WriteZ16LEncoder(char* p)
{
WriteSwizzler(p, GX_CTF_Z16L);
WRITE(p, " float depth;\n");
// byte order is reversed
WriteSampleColor(p, "b", "depth");
WRITE(p, " ocol0.b = frac(depth * 65536.0f);\n");
WRITE(p, " ocol0.g = frac(depth * 256.0f);\n");
WriteIncrementSampleX(p);
WriteSampleColor(p, "b", "depth");
WRITE(p, " ocol0.r = frac(depth * 65536.0f);\n");
WRITE(p, " ocol0.a = frac(depth * 256.0f);\n");
WRITE(p, "}\n");
}
void WriteZ24Encoder(char* p)
{
Write32BitSwizzler(p, GX_TF_Z24X8);
WRITE(p, " float cl = xb - (halfxb * 2);\n");
WRITE(p, " float depth0;\n");
WRITE(p, " float depth1;\n");
WriteSampleColor(p, "b", "depth0");
WriteIncrementSampleX(p);
WriteSampleColor(p, "b", "depth1");
WRITE(p, " if(cl > 0.5f) {\n");
// upper 16
WRITE(p, " ocol0.b = frac(depth0 * 256.0f);\n");
WRITE(p, " ocol0.g = depth0\n");
WRITE(p, " ocol0.r = frac(depth1 * 256.0f);\n");
WRITE(p, " ocol0.a = depth1\n");
WRITE(p, " } else {\n");
// lower 8
WRITE(p, " ocol0.b = 1.0f;\n");
WRITE(p, " ocol0.g = frac(depth0 * 65536.0f)\n");
WRITE(p, " ocol0.r = 1.0f);\n");
WRITE(p, " ocol0.a = frac(depth0 * 65536.0f)\n");
WRITE(p, " }\n"
"}\n");
}
const char *GenerateEncodingShader(u32 format)
{
text[sizeof(text) - 1] = 0x7C; // canary
@ -633,7 +728,7 @@ const char *GenerateEncodingShader(u32 format)
WriteRGB5A3Encoder(p);
break;
case GX_TF_RGBA8:
WriteRGBA8Encoder(p, false);
WriteRGBA8Encoder(p);
break;
case GX_CTF_R4:
WriteC4Encoder(p, "r");
@ -666,24 +761,22 @@ const char *GenerateEncodingShader(u32 format)
WriteC8Encoder(p, "b");
break;
case GX_TF_Z16:
// byte order is reversed
WriteCC8Encoder(p, "gb");
WriteZ16Encoder(p);
break;
case GX_TF_Z24X8:
WriteRGBA8Encoder(p, true);
WriteZ24Encoder(p);
break;
case GX_CTF_Z4:
WriteC4Encoder(p, "b");
break;
case GX_CTF_Z8M:
WriteC8Encoder(p, "g");
WriteZ8Encoder(p, "256.0f");
break;
case GX_CTF_Z8L:
WriteC8Encoder(p, "r");
WriteZ8Encoder(p, "65536.0f" );
break;
case GX_CTF_Z16L:
// byte order is reversed
WriteCC8Encoder(p, "rg");
WriteZ16LEncoder(p);
break;
default:
PanicAlert("Unknown texture copy format: 0x%x\n", format);

View File

@ -160,8 +160,6 @@ void Shutdown()
void EncodeToRamUsingShader(FRAGMENTSHADER& shader, GLuint srcTexture, const TRectangle& sourceRc,
u8* destAddr, int dstWidth, int dstHeight, bool linearFilter)
{
Renderer::SetRenderMode(Renderer::RM_Normal);
Renderer::ResetGLState();
// switch to texture converter frame buffer
@ -243,7 +241,7 @@ void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyf
u8 *dest_ptr = Memory_GetPtr(address);
u32 source_texture = bFromZBuffer ? Renderer::ResolveAndGetFakeZTarget(source) : Renderer::ResolveAndGetRenderTarget(source);
u32 source_texture = bFromZBuffer ? Renderer::ResolveAndGetDepthTarget(source) : Renderer::ResolveAndGetRenderTarget(source);
int width = source.right - source.left;
int height = source.bottom - source.top;
@ -288,9 +286,6 @@ void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyf
scaledSource.right = expandedWidth / samples;
EncodeToRamUsingShader(texconv_shader, source_texture, scaledSource, dest_ptr, expandedWidth / samples, expandedHeight, bScaleByHalf);
if (bFromZBuffer)
Renderer::SetZBufferRender(); // notify for future settings
}
void EncodeToRamYUYV(GLuint srcTexture, const TRectangle& sourceRc,
@ -303,7 +298,6 @@ void EncodeToRamYUYV(GLuint srcTexture, const TRectangle& sourceRc,
// Should be scale free.
void DecodeToTexture(u8* srcAddr, int srcWidth, int srcHeight, GLuint destTexture)
{
Renderer::SetRenderMode(Renderer::RM_Normal);
Renderer::ResetGLState();
float srcFormatFactor = 0.5f;

View File

@ -55,7 +55,6 @@
u8 *TextureMngr::temp = NULL;
TextureMngr::TexCache TextureMngr::textures;
std::map<u32, TextureMngr::DEPTHTARGET> TextureMngr::mapDepthTargets;
extern int frameCount;
static u32 s_TempFramebuffer = 0;
@ -176,13 +175,6 @@ void TextureMngr::Shutdown()
{
Invalidate(true);
std::map<u32, DEPTHTARGET>::iterator itdepth = mapDepthTargets.begin();
for (itdepth = mapDepthTargets.begin(); itdepth != mapDepthTargets.end(); ++itdepth)
{
glDeleteRenderbuffersEXT(1, &itdepth->second.targ);
}
mapDepthTargets.clear();
if (s_TempFramebuffer) {
glDeleteFramebuffersEXT(1, (GLuint *)&s_TempFramebuffer);
s_TempFramebuffer = 0;
@ -217,14 +209,6 @@ void TextureMngr::ProgressiveCleanup()
else
iter++;
}
std::map<u32, DEPTHTARGET>::iterator itdepth = mapDepthTargets.begin();
while (itdepth != mapDepthTargets.end())
{
if (frameCount > 20 + itdepth->second.framecount)
ERASE_THROUGH_ITERATOR(mapDepthTargets, itdepth);
else ++itdepth;
}
}
void TextureMngr::InvalidateRange(u32 start_address, u32 size) {
@ -588,8 +572,9 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool
break;
case 3: // Z16 //?
case 11: // Z16
colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1;
case 11: // Z16 (reverse order)
colmat[2] = colmat[6] = colmat[10] = colmat[13] = 1;
break;
case 6: // Z24X8
colmat[0] = 1;
@ -702,9 +687,8 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool
// Make sure to resolve anything we need to read from.
// TODO - it seems that it sometimes doesn't resolve the entire area we are interested in. See shadows in Burnout 2.
GLuint read_texture = bFromZBuffer ? Renderer::ResolveAndGetFakeZTarget(scaled_rect) : Renderer::ResolveAndGetRenderTarget(scaled_rect);
GLuint read_texture = bFromZBuffer ? Renderer::ResolveAndGetDepthTarget(scaled_rect) : Renderer::ResolveAndGetRenderTarget(scaled_rect);
Renderer::SetRenderMode(Renderer::RM_Normal); // set back to normal
GL_REPORT_ERRORD();
// We have to run a pixel shader, for color conversion.
@ -716,29 +700,7 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool
Renderer::SetFramebuffer(s_TempFramebuffer);
Renderer::SetRenderTarget(entry.texture);
GL_REPORT_ERRORD();
// create and attach the render target
std::map<u32, DEPTHTARGET>::iterator itdepth = mapDepthTargets.find((h << 16) | w);
if (itdepth == mapDepthTargets.end())
{
DEPTHTARGET& depth = mapDepthTargets[(h << 16) | w];
depth.framecount = frameCount;
glGenRenderbuffersEXT(1, &depth.targ);
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, depth.targ);
glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH_COMPONENT, w, h);
GL_REPORT_ERRORD();
Renderer::SetDepthTarget(depth.targ);
GL_REPORT_ERRORD();
}
else
{
itdepth->second.framecount = frameCount;
Renderer::SetDepthTarget(itdepth->second.targ);
GL_REPORT_ERRORD();
}
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
glActiveTexture(GL_TEXTURE0);
glEnable(GL_TEXTURE_RECTANGLE_ARB);
@ -747,7 +709,7 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool
glViewport(0, 0, w, h);
glEnable(GL_FRAGMENT_PROGRAM_ARB);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, PixelShaderCache::GetColorMatrixProgram());
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, bFromZBuffer ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram());
PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation
GL_REPORT_ERRORD();
@ -765,9 +727,6 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool
VertexShaderManager::SetViewportChanged();
TextureMngr::DisableStage(0);
if (bFromZBuffer)
Renderer::SetZBufferRender(); // notify for future settings
GL_REPORT_ERRORD();
if (g_Config.bDumpEFBTarget)

View File

@ -57,19 +57,11 @@ public:
bool IntersectsMemoryRange(u32 range_address, u32 range_size);
};
struct DEPTHTARGET
{
DEPTHTARGET() : targ(0), framecount(0) {}
GLuint targ;
int framecount;
};
private:
typedef std::map<u32, TCacheEntry> TexCache;
static u8 *temp;
static TexCache textures;
static std::map<u32, DEPTHTARGET> mapDepthTargets;
public:
static void Init();

View File

@ -257,29 +257,6 @@ void Flush()
FRAGMENTSHADER* ps = PixelShaderCache::GetShader(false);
VERTEXSHADER* vs = VertexShaderCache::GetShader(g_nativeVertexFmt->m_components);
bool bRestoreBuffers = false;
if (Renderer::UseFakeZTarget())
{
if (bpmem.zmode.updateenable)
{
if (!bpmem.blendmode.colorupdate)
{
Renderer::SetRenderMode(bpmem.blendmode.alphaupdate ?
Renderer::RM_ZBufferAlpha :
Renderer::RM_ZBufferOnly);
}
}
else
{
Renderer::SetRenderMode(Renderer::RM_Normal);
// remove temporarily
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
bRestoreBuffers = true;
}
}
else
Renderer::SetRenderMode(Renderer::RM_Normal);
// set global constants
VertexShaderManager::SetConstants(g_Config.bProjHack1,g_Config.bPhackvalue1, g_Config.fhackvalue1, g_Config.bPhackvalue2, g_Config.fhackvalue2, g_Config.bFreeLook);
PixelShaderManager::SetConstants();
@ -330,8 +307,7 @@ void Flush()
}
// restore color mask
if (!bRestoreBuffers)
Renderer::SetColorMask();
Renderer::SetColorMask();
if (bpmem.blendmode.blendenable || bpmem.blendmode.subtract)
glEnable(GL_BLEND);
@ -361,13 +337,6 @@ void Flush()
GL_REPORT_ERRORD();
if (bRestoreBuffers)
{
GLenum s_drawbuffers[2] = {GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT};
glDrawBuffers(2, s_drawbuffers);
Renderer::SetColorMask();
}
ResetBuffer();
}

View File

@ -71,8 +71,7 @@ VERTEXSHADER* VertexShaderCache::GetShader(u32 components)
{
DVSTARTPROFILE();
VERTEXSHADERUID uid;
u32 zbufrender = (bpmem.ztex2.op == ZTEXTURE_ADD) || Renderer::UseFakeZTarget();
GetVertexShaderId(uid, components, zbufrender);
GetVertexShaderId(uid, components);
VSCache::iterator iter = vshaders.find(uid);
@ -86,7 +85,7 @@ VERTEXSHADER* VertexShaderCache::GetShader(u32 components)
}
VSCacheEntry& entry = vshaders[uid];
const char *code = GenerateVertexShader(components, Renderer::UseFakeZTarget());
const char *code = GenerateVertexShader(components);
#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_Config.iLog & CONF_SAVESHADERS && code) {