From fdad95c055ad5274e3a0b053558fd76b1766243b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 20 Mar 2014 20:34:51 +0100 Subject: [PATCH 1/5] PixelShaderGen: Cleanups and fixes for tev combiners. Fixes issue 4674. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 101 +++++++++++---------- 1 file changed, 53 insertions(+), 48 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 29399a253f..961ca9f3b1 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -90,27 +90,6 @@ static const char *tevKSelTableA[] = I_KCOLORS"[3].a", // K3_A = 0x1F }; -static const char *tevScaleTable[] = -{ - "", // SCALE_1 - " << 1", // SCALE_2 - " << 2", // SCALE_4 - " >> 1", // DIVIDE_2 -}; - -static const char *tevBiasTable[] = -{ - "", // ZERO, - "+ 128", // ADDHALF, - "- 128", // SUBHALF, - "", -}; - -static const char *tevOpTable[] = { - "+", // TEVOP_ADD = 0, - "-", // TEVOP_SUB = 1, -}; - static const char *tevCInputTable[] = { "prev.rgb", // CPREV, @@ -343,7 +322,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T "\tint3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" "\tint alphabump=0;\n" "\tint3 tevcoord=int3(0, 0, 0);\n" - "\tint2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n\n"); + "\tint2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n" + "\tint4 tevin_a=int4(0,0,0,0),tevin_b=int4(0,0,0,0),tevin_c=int4(0,0,0,0),tevin_d=int4(0,0,0,0);\n\n"); // tev combiner inputs if (ApiType == API_OPENGL) { @@ -778,6 +758,33 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP if (ac.dest >= GX_TEVREG0 && ac.dest <= GX_TEVREG2) out.SetConstantsUsed(C_COLORS+ac.dest, C_COLORS+ac.dest); + + const char *tevScaleTable[] = + { + "", // SCALE_1 + " << 1", // SCALE_2 + " << 2", // SCALE_4 + " >> 1", // DIVIDE_2 + }; + + const char *tevBiasTable[] = + { + "", // ZERO, + "+ 128", // ADDHALF, + "- 128", // SUBHALF, + "", + }; + + const char *tevOpTable[] = { + "+", // TEVOP_ADD = 0, + "-", // TEVOP_SUB = 1, + }; + + out.Write("tevin_a = int4(%s, %s.a)&255;\n", tevCInputTable[cc.a], tevAInputTable[ac.a]); + out.Write("tevin_b = int4(%s, %s.a)&255;\n", tevCInputTable[cc.b], tevAInputTable[ac.b]); + out.Write("tevin_c = int4(%s, %s.a)&255;\n", tevCInputTable[cc.c], tevAInputTable[ac.c]); + out.Write("tevin_d = int4(%s, %s.a);\n", tevCInputTable[cc.d], tevAInputTable[ac.d]); + out.Write("\t// color combine\n"); out.Write("\t%s = clamp(", tevCOutputTable[cc.dest]); @@ -789,9 +796,9 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.Write("("); if (!(cc.d == TEVCOLORARG_ZERO && cc.op == TEVOP_ADD)) - out.Write("%s %s ", tevCInputTable[cc.d], tevOpTable[cc.op]); + out.Write("tevin_d.rgb %s ", tevOpTable[cc.op]); - out.Write("((%s&255) * (int3(255,255,255) - (%s&255)) + (%s&255) * (%s&255)) / 255", tevCInputTable[cc.a], tevCInputTable[cc.c], tevCInputTable[cc.b], tevCInputTable[cc.c]); + out.Write("(tevin_a.rgb * (int3(255,255,255) - tevin_c.rgb) + tevin_b.rgb * tevin_c.rgb) / 255"); out.Write(" %s", tevBiasTable[cc.bias]); @@ -802,20 +809,19 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP { const char *function_table[] = { - "(((%s.r&255) > %s.r) ? (%s&255): int3(0,0,0))", // TEVCMP_R8_GT - "(((%s.r&255) == %s.r) ? (%s&255): int3(0,0,0))", // TEVCMP_R8_EQ - "((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))", // TEVCMP_GR16_GT - "((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))", // TEVCMP_GR16_EQ - "((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))", // TEVCMP_BGR24_GT - "((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))", // TEVCMP_BGR24_EQ - "int3(max(sign(int3((%s.rgb&255)) - int3((%s.rgb&255))), int3(0,0,0)) * (%s&255))", // TEVCMP_RGB8_GT - "int3((int3(255,255,255) - max(sign(abs(int3((%s.rgb&255)) - int3((%s.rgb&255)))), int3(0,0,0))) * (%s&255))" // TEVCMP_RGB8_EQ + "((tevin_a.r > tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_R8_GT + "((tevin_a.r == tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_R8_EQ + "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_GR16_GT + "((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_GR16_EQ + "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_BGR24_GT + "((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_BGR24_EQ + "(max(sign(tevin_a.rgb - tevin_b.rgb), int3(0,0,0)) * tevin_c.rgb)", // TEVCMP_RGB8_GT + "((int3(255,255,255) - max(sign(abs(tevin_a.rgb - tevin_b.rgb))), int3(0,0,0))) * tevin_c.rgb)" // TEVCMP_RGB8_EQ }; int mode = (cc.shift<<1)|cc.op; - out.Write(" %s + ", tevCInputTable[cc.d]); - out.Write(function_table[mode], tevCInputTable[cc.a], - tevCInputTable[cc.b], tevCInputTable[cc.c]); + out.Write(" tevin_d.rgb + "); + out.Write(function_table[mode]); } if (cc.clamp) out.Write(", int3(0,0,0), int3(255,255,255))"); @@ -833,9 +839,9 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.Write("("); if (!(ac.d == TEVALPHAARG_ZERO && ac.op == TEVOP_ADD)) - out.Write("%s.a %s ", tevAInputTable[ac.d], tevOpTable[ac.op]); + out.Write("tevin_d.a %s ", tevOpTable[ac.op]); - out.Write("((%s.a&255) * (255 - (%s.a&255)) + (%s.a&255) * (%s.a&255)) / 255", tevAInputTable[ac.a], tevAInputTable[ac.c], tevAInputTable[ac.b], tevAInputTable[ac.c]); + out.Write("(tevin_a.a * (255 - tevin_c.a) + tevin_b.a * tevin_c.a) / 255"); out.Write(" %s",tevBiasTable[ac.bias]); @@ -846,20 +852,19 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP { const char *function_table[] = { - "(((%s.r&255) > (%s.r&255)) ? (%s.a&255) : 0)", // TEVCMP_R8_GT - "(((%s.r&255) == (%s.r&255)) ? (%s.a&255) : 0)", // TEVCMP_R8_EQ - "((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)", // TEVCMP_GR16_GT - "((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)", // TEVCMP_GR16_EQ - "((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)", // TEVCMP_BGR24_GT - "((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)", // TEVCMP_BGR24_EQ - "(((%s.a&255) > (%s.a&255)) ? (%s.a&255) : 0)", // TEVCMP_A8_GT - "(((%s.a&255) == (%s.a&255)) ? (%s.a&255) : 0)" // TEVCMP_A8_EQ + "((tevin_a.r > tevin_b.r) ? tevin_c.a : 0)", // TEVCMP_R8_GT + "((tevin_a.r == tevin_b.r) ? tevin_c.a : 0)", // TEVCMP_R8_EQ + "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // TEVCMP_GR16_GT + "((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // TEVCMP_GR16_EQ + "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // TEVCMP_BGR24_GT + "((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // TEVCMP_BGR24_EQ + "((tevin_a.a > tevin_b.a) ? tevin_c.a : 0)", // TEVCMP_A8_GT + "((tevin_a.a == tevin_b.a) ? tevin_c.a : 0)" // TEVCMP_A8_EQ }; int mode = (ac.shift<<1)|ac.op; - out.Write(" %s.a + ", tevAInputTable[ac.d]); - out.Write(function_table[mode], tevAInputTable[ac.a], - tevAInputTable[ac.b], tevAInputTable[ac.c]); + out.Write(" tevin_d.a + "); + out.Write(function_table[mode]); } if (ac.clamp) out.Write(", 0, 255)"); From 4f82d6f7aff107d3cb941df111250c66aa9f5ea8 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 20 Mar 2014 20:49:09 +0100 Subject: [PATCH 2/5] PixelShaderGen: Implement tev combiner lerping in a faster way which also reproduces hardware behavior perfectly. The new behavior has been verified to be correct by hardware tests. This is an improvement over the old code, which was just a guess. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 52 +++++++++------------- 1 file changed, 22 insertions(+), 30 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 961ca9f3b1..ff8b9c2f1c 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -759,19 +759,35 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.SetConstantsUsed(C_COLORS+ac.dest, C_COLORS+ac.dest); - const char *tevScaleTable[] = + const char *tevScaleTableLeft[] = { "", // SCALE_1 " << 1", // SCALE_2 " << 2", // SCALE_4 + "", // DIVIDE_2 + }; + + const char *tevScaleTableRight[] = + { + "", // SCALE_1 + "", // SCALE_2 + "", // SCALE_4 " >> 1", // DIVIDE_2 }; + const char *tevLerpBias[] = // indexed by 2*op+(shift==3) + { + "", + " + 128", + "", + " + 127", + }; + const char *tevBiasTable[] = { - "", // ZERO, - "+ 128", // ADDHALF, - "- 128", // SUBHALF, + "", // ZERO, + " + 128", // ADDHALF, + " - 128", // SUBHALF, "", }; @@ -791,19 +807,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP // combine the color channel if (cc.bias != TevBias_COMPARE) // if not compare { - //normal color combiner goes here - if (cc.shift > TEVSCALE_1) - out.Write("("); - - if (!(cc.d == TEVCOLORARG_ZERO && cc.op == TEVOP_ADD)) - out.Write("tevin_d.rgb %s ", tevOpTable[cc.op]); - - out.Write("(tevin_a.rgb * (int3(255,255,255) - tevin_c.rgb) + tevin_b.rgb * tevin_c.rgb) / 255"); - - out.Write(" %s", tevBiasTable[cc.bias]); - - if (cc.shift > TEVSCALE_1) - out.Write(")%s", tevScaleTable[cc.shift]); + out.Write("(((tevin_d.rgb%s)%s) %s ((((tevin_a.rgb*256 + (tevin_b.rgb-tevin_a.rgb)*(tevin_c.rgb+(tevin_c.rgb>>7)))%s)%s)>>8))%s", tevBiasTable[cc.bias], tevScaleTableLeft[cc.shift], tevOpTable[cc.op], tevScaleTableLeft[cc.shift], tevLerpBias[2*cc.op+(cc.shift==3)], tevScaleTableRight[cc.shift]); } else { @@ -834,19 +838,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP if (ac.bias != TevBias_COMPARE) // if not compare { - //normal alpha combiner goes here - if (ac.shift > 0) - out.Write("("); - - if (!(ac.d == TEVALPHAARG_ZERO && ac.op == TEVOP_ADD)) - out.Write("tevin_d.a %s ", tevOpTable[ac.op]); - - out.Write("(tevin_a.a * (255 - tevin_c.a) + tevin_b.a * tevin_c.a) / 255"); - - out.Write(" %s",tevBiasTable[ac.bias]); - - if (ac.shift>0) - out.Write(")%s", tevScaleTable[ac.shift]); + out.Write("(((tevin_d.a%s)%s) %s ((((tevin_a.a*256 + (tevin_b.a-tevin_a.a)*(tevin_c.a+(tevin_c.a>>7)))%s)%s)>>8))%s", tevBiasTable[ac.bias], tevScaleTableLeft[ac.shift], tevOpTable[ac.op], tevScaleTableLeft[ac.shift], tevLerpBias[2*ac.op+(ac.shift==3)], tevScaleTableRight[ac.shift]); } else { From eb0f547a17c74bf03827c48c7b8fce6b7a303174 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 24 Mar 2014 14:41:56 +0100 Subject: [PATCH 3/5] PixelShaderGen: Cleanups. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 166 +++++++++++---------- 1 file changed, 90 insertions(+), 76 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index ff8b9c2f1c..85d25b2cac 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -112,14 +112,14 @@ static const char *tevCInputTable[] = static const char *tevAInputTable[] = { - "prev", // APREV, - "c0", // A0, - "c1", // A1, - "c2", // A2, - "textemp", // TEXA, - "rastemp", // RASA, - "konsttemp", // KONST, (hw1 had quarter) - "int4(0,0,0,0)", // ZERO + "prev.a", // APREV, + "c0.a", // A0, + "c1.a", // A1, + "c2.a", // A2, + "textemp.a", // TEXA, + "rastemp.a", // RASA, + "konsttemp.a", // KONST, (hw1 had quarter) + "0", // ZERO }; static const char *tevRasTable[] = @@ -140,6 +140,7 @@ static const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" }; static char text[16384]; template static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, const char swapModeTable[4][5]); +template static inline void WriteTevRegular(T& out, const char* components, int bias, int op, int clamp, int shift); template static inline void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); template static inline void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); template static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data); @@ -759,6 +760,76 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.SetConstantsUsed(C_COLORS+ac.dest, C_COLORS+ac.dest); + out.Write("tevin_a = int4(%s, %s)&255;\n", tevCInputTable[cc.a], tevAInputTable[ac.a]); + out.Write("tevin_b = int4(%s, %s)&255;\n", tevCInputTable[cc.b], tevAInputTable[ac.b]); + out.Write("tevin_c = int4(%s, %s)&255;\n", tevCInputTable[cc.c], tevAInputTable[ac.c]); + out.Write("tevin_d = int4(%s, %s);\n", tevCInputTable[cc.d], tevAInputTable[ac.d]); + + out.Write("\t// color combine\n"); + out.Write("\t%s = clamp(", tevCOutputTable[cc.dest]); + if (cc.bias != TevBias_COMPARE) + { + WriteTevRegular(out, "rgb", cc.bias, cc.op, cc.clamp, cc.shift); + } + else + { + const char *function_table[] = + { + "((tevin_a.r > tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_R8_GT + "((tevin_a.r == tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_R8_EQ + "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_GR16_GT + "((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_GR16_EQ + "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_BGR24_GT + "((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_BGR24_EQ + "(max(sign(tevin_a.rgb - tevin_b.rgb), int3(0,0,0)) * tevin_c.rgb)", // TEVCMP_RGB8_GT + "((int3(255,255,255) - max(sign(abs(tevin_a.rgb - tevin_b.rgb))), int3(0,0,0))) * tevin_c.rgb)" // TEVCMP_RGB8_EQ + }; + + int mode = (cc.shift<<1)|cc.op; + out.Write(" tevin_d.rgb + "); + out.Write(function_table[mode]); + } + if (cc.clamp) + out.Write(", int3(0,0,0), int3(255,255,255))"); + else + out.Write(", int3(-1024,-1024,-1024), int3(1023,1023,1023))"); + out.Write(";\n"); + + out.Write("\t// alpha combine\n"); + out.Write("\t%s = clamp(", tevAOutputTable[ac.dest]); + if (ac.bias != TevBias_COMPARE) + { + WriteTevRegular(out, "a", ac.bias, ac.op, ac.clamp, ac.shift); + } + else + { + const char *function_table[] = + { + "((tevin_a.r > tevin_b.r) ? tevin_c.a : 0)", // TEVCMP_R8_GT + "((tevin_a.r == tevin_b.r) ? tevin_c.a : 0)", // TEVCMP_R8_EQ + "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // TEVCMP_GR16_GT + "((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // TEVCMP_GR16_EQ + "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // TEVCMP_BGR24_GT + "((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // TEVCMP_BGR24_EQ + "((tevin_a.a > tevin_b.a) ? tevin_c.a : 0)", // TEVCMP_A8_GT + "((tevin_a.a == tevin_b.a) ? tevin_c.a : 0)" // TEVCMP_A8_EQ + }; + + int mode = (ac.shift<<1)|ac.op; + out.Write(" tevin_d.a + "); + out.Write(function_table[mode]); + } + if (ac.clamp) + out.Write(", 0, 255)"); + else + out.Write(", -1024, 1023)"); + + out.Write(";\n\n"); +} + +template +static inline void WriteTevRegular(T& out, const char* components, int bias, int op, int clamp, int shift) +{ const char *tevScaleTableLeft[] = { "", // SCALE_1 @@ -796,74 +867,17 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP "-", // TEVOP_SUB = 1, }; - out.Write("tevin_a = int4(%s, %s.a)&255;\n", tevCInputTable[cc.a], tevAInputTable[ac.a]); - out.Write("tevin_b = int4(%s, %s.a)&255;\n", tevCInputTable[cc.b], tevAInputTable[ac.b]); - out.Write("tevin_c = int4(%s, %s.a)&255;\n", tevCInputTable[cc.c], tevAInputTable[ac.c]); - out.Write("tevin_d = int4(%s, %s.a);\n", tevCInputTable[cc.d], tevAInputTable[ac.d]); - - out.Write("\t// color combine\n"); - out.Write("\t%s = clamp(", tevCOutputTable[cc.dest]); - - // combine the color channel - if (cc.bias != TevBias_COMPARE) // if not compare - { - out.Write("(((tevin_d.rgb%s)%s) %s ((((tevin_a.rgb*256 + (tevin_b.rgb-tevin_a.rgb)*(tevin_c.rgb+(tevin_c.rgb>>7)))%s)%s)>>8))%s", tevBiasTable[cc.bias], tevScaleTableLeft[cc.shift], tevOpTable[cc.op], tevScaleTableLeft[cc.shift], tevLerpBias[2*cc.op+(cc.shift==3)], tevScaleTableRight[cc.shift]); - } - else - { - const char *function_table[] = - { - "((tevin_a.r > tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_R8_GT - "((tevin_a.r == tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_R8_EQ - "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_GR16_GT - "((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_GR16_EQ - "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_BGR24_GT - "((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_BGR24_EQ - "(max(sign(tevin_a.rgb - tevin_b.rgb), int3(0,0,0)) * tevin_c.rgb)", // TEVCMP_RGB8_GT - "((int3(255,255,255) - max(sign(abs(tevin_a.rgb - tevin_b.rgb))), int3(0,0,0))) * tevin_c.rgb)" // TEVCMP_RGB8_EQ - }; - - int mode = (cc.shift<<1)|cc.op; - out.Write(" tevin_d.rgb + "); - out.Write(function_table[mode]); - } - if (cc.clamp) - out.Write(", int3(0,0,0), int3(255,255,255))"); - else - out.Write(", int3(-1024,-1024,-1024), int3(1023,1023,1023))"); - out.Write(";\n"); - - out.Write("\t// alpha combine\n"); - out.Write("\t%s = clamp(", tevAOutputTable[ac.dest]); - - if (ac.bias != TevBias_COMPARE) // if not compare - { - out.Write("(((tevin_d.a%s)%s) %s ((((tevin_a.a*256 + (tevin_b.a-tevin_a.a)*(tevin_c.a+(tevin_c.a>>7)))%s)%s)>>8))%s", tevBiasTable[ac.bias], tevScaleTableLeft[ac.shift], tevOpTable[ac.op], tevScaleTableLeft[ac.shift], tevLerpBias[2*ac.op+(ac.shift==3)], tevScaleTableRight[ac.shift]); - } - else - { - const char *function_table[] = - { - "((tevin_a.r > tevin_b.r) ? tevin_c.a : 0)", // TEVCMP_R8_GT - "((tevin_a.r == tevin_b.r) ? tevin_c.a : 0)", // TEVCMP_R8_EQ - "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // TEVCMP_GR16_GT - "((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // TEVCMP_GR16_EQ - "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // TEVCMP_BGR24_GT - "((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // TEVCMP_BGR24_EQ - "((tevin_a.a > tevin_b.a) ? tevin_c.a : 0)", // TEVCMP_A8_GT - "((tevin_a.a == tevin_b.a) ? tevin_c.a : 0)" // TEVCMP_A8_EQ - }; - - int mode = (ac.shift<<1)|ac.op; - out.Write(" tevin_d.a + "); - out.Write(function_table[mode]); - } - if (ac.clamp) - out.Write(", 0, 255)"); - else - out.Write(", -1024, 1023)"); - - out.Write(";\n\n"); + // Regular TEV stage: (d + bias + lerp(a,b,c)) * scale + // The GC/Wii GPU uses a very sophisticated algorithm for scale-lerping: + // - c is scaled from 0..255 to 0..256, which allows dividing the result by 256 instead of 255 + // - if scale is bigger than one, it is moved inside the lerp calculation for increased accuracy + // - a rounding bias is added before dividing by 256 + out.Write("(((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]); + out.Write(" %s ", tevOpTable[op]); + out.Write("((((tevin_a.%s*256 + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+(tevin_c.%s>>7)))%s)%s)>>8)", + components, components, components, components, components, + tevScaleTableLeft[shift], tevLerpBias[2*op+(shift==3)]); + out.Write(")%s", tevScaleTableRight[shift]); } template From a8c8db8da7487b4bc3bcfb549c296a811c691042 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 24 Mar 2014 14:42:04 +0100 Subject: [PATCH 4/5] Software renderer: Use color combiner configuration for alpha combiner compare mode inputs. As pointed out by dolphin-emu/hwtests@f684f2498. --- Source/Core/VideoBackends/Software/Tev.cpp | 270 ++++++++------------- Source/Core/VideoBackends/Software/Tev.h | 8 +- 2 files changed, 100 insertions(+), 178 deletions(-) diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp index 1890aaa3e3..3c294ef5f6 100644 --- a/Source/Core/VideoBackends/Software/Tev.cpp +++ b/Source/Core/VideoBackends/Software/Tev.cpp @@ -55,14 +55,14 @@ void Tev::Init() m_ColorInputLUT[14][RED_INP] = &StageKonst[RED_C]; m_ColorInputLUT[14][GRN_INP] = &StageKonst[GRN_C]; m_ColorInputLUT[14][BLU_INP] = &StageKonst[BLU_C]; // konst m_ColorInputLUT[15][RED_INP] = &FixedConstants[0]; m_ColorInputLUT[15][GRN_INP] = &FixedConstants[0]; m_ColorInputLUT[15][BLU_INP] = &FixedConstants[0]; // zero - m_AlphaInputLUT[0] = Reg[0]; // prev - m_AlphaInputLUT[1] = Reg[1]; // c0 - m_AlphaInputLUT[2] = Reg[2]; // c1 - m_AlphaInputLUT[3] = Reg[3]; // c2 - m_AlphaInputLUT[4] = TexColor; // tex - m_AlphaInputLUT[5] = RasColor; // ras - m_AlphaInputLUT[6] = StageKonst; // konst - m_AlphaInputLUT[7] = Zero16; // zero + m_AlphaInputLUT[0] = &Reg[0][ALP_C]; // prev + m_AlphaInputLUT[1] = &Reg[1][ALP_C]; // c0 + m_AlphaInputLUT[2] = &Reg[2][ALP_C]; // c1 + m_AlphaInputLUT[3] = &Reg[3][ALP_C]; // c2 + m_AlphaInputLUT[4] = &TexColor[ALP_C]; // tex + m_AlphaInputLUT[5] = &RasColor[ALP_C]; // ras + m_AlphaInputLUT[6] = &StageKonst[ALP_C]; // konst + m_AlphaInputLUT[7] = &Zero16[ALP_C]; // zero for (int comp = 0; comp < 4; comp++) { @@ -176,16 +176,11 @@ void Tev::SetRasColor(int colorChan, int swaptable) } } -void Tev::DrawColorRegular(TevStageCombiner::ColorCombiner &cc) +void Tev::DrawColorRegular(TevStageCombiner::ColorCombiner &cc, const InputRegType inputs[4]) { - InputRegType InputReg; - for (int i = 0; i < 3; i++) { - InputReg.a = *m_ColorInputLUT[cc.a][i]; - InputReg.b = *m_ColorInputLUT[cc.b][i]; - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; + const InputRegType& InputReg = inputs[BLU_C + i]; u16 c = InputReg.c + (InputReg.c >> 7); @@ -200,120 +195,66 @@ void Tev::DrawColorRegular(TevStageCombiner::ColorCombiner &cc) } } -void Tev::DrawColorCompare(TevStageCombiner::ColorCombiner &cc) +void Tev::DrawColorCompare(TevStageCombiner::ColorCombiner &cc, const InputRegType inputs[4]) { - int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here + for (int i = BLU_C; i < RED_C; i++) + { + switch ((cc.shift<<1)|cc.op|8) // encoded compare mode + { + case TEVCMP_R8_GT: + Reg[cc.dest][i] = inputs[i].d + ((inputs[RED_C].a > inputs[RED_C].b) ? inputs[i].c : 0); + break; - u32 a; - u32 b; + case TEVCMP_R8_EQ: + Reg[cc.dest][i] = inputs[i].d + ((inputs[RED_C].a == inputs[RED_C].b) ? inputs[i].c : 0); + break; - InputRegType InputReg; + case TEVCMP_GR16_GT: + { + u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[cc.dest][i] = inputs[i].d + ((a > b) ? inputs[i].c : 0); + } + break; - switch (cmp) { - case TEVCMP_R8_GT: - { - a = *m_ColorInputLUT[cc.a][RED_INP] & 0xff; - b = *m_ColorInputLUT[cc.b][RED_INP] & 0xff; - for (int i = 0; i < 3; i++) + case TEVCMP_GR16_EQ: { - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((a > b) ? InputReg.c : 0); + u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[cc.dest][i] = inputs[i].d + ((a == b) ? inputs[i].c : 0); } - } - break; + break; - case TEVCMP_R8_EQ: - { - a = *m_ColorInputLUT[cc.a][RED_INP] & 0xff; - b = *m_ColorInputLUT[cc.b][RED_INP] & 0xff; - for (int i = 0; i < 3; i++) + case TEVCMP_BGR24_GT: { - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((a == b) ? InputReg.c : 0); + u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[cc.dest][i] = inputs[i].d + ((a > b) ? inputs[i].c : 0); } - } - break; - case TEVCMP_GR16_GT: - { - a = ((*m_ColorInputLUT[cc.a][GRN_INP] & 0xff) << 8) | (*m_ColorInputLUT[cc.a][RED_INP] & 0xff); - b = ((*m_ColorInputLUT[cc.b][GRN_INP] & 0xff) << 8) | (*m_ColorInputLUT[cc.b][RED_INP] & 0xff); - for (int i = 0; i < 3; i++) + break; + + case TEVCMP_BGR24_EQ: { - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((a > b) ? InputReg.c : 0); + u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[cc.dest][i] = inputs[i].d + ((a == b) ? inputs[i].c : 0); } + break; + + case TEVCMP_RGB8_GT: + Reg[cc.dest][i] = inputs[i].d + ((inputs[i].a > inputs[i].b) ? inputs[i].c : 0); + break; + + case TEVCMP_RGB8_EQ: + Reg[cc.dest][i] = inputs[i].d + ((inputs[i].a == inputs[i].b) ? inputs[i].c : 0); + break; } - break; - case TEVCMP_GR16_EQ: - { - a = ((*m_ColorInputLUT[cc.a][GRN_C] & 0xff) << 8) | (*m_ColorInputLUT[cc.a][RED_INP] & 0xff); - b = ((*m_ColorInputLUT[cc.b][GRN_C] & 0xff) << 8) | (*m_ColorInputLUT[cc.b][RED_INP] & 0xff); - for (int i = 0; i < 3; i++) - { - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((a == b) ? InputReg.c : 0); - } - } - break; - case TEVCMP_BGR24_GT: - { - a = ((*m_ColorInputLUT[cc.a][BLU_C] & 0xff) << 16) | ((*m_ColorInputLUT[cc.a][GRN_C] & 0xff) << 8) | (*m_ColorInputLUT[cc.a][RED_INP] & 0xff); - b = ((*m_ColorInputLUT[cc.b][BLU_C] & 0xff) << 16) | ((*m_ColorInputLUT[cc.b][GRN_C] & 0xff) << 8) | (*m_ColorInputLUT[cc.b][RED_INP] & 0xff); - for (int i = 0; i < 3; i++) - { - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((a > b) ? InputReg.c : 0); - } - } - break; - case TEVCMP_BGR24_EQ: - { - a = ((*m_ColorInputLUT[cc.a][BLU_C] & 0xff) << 16) | ((*m_ColorInputLUT[cc.a][GRN_C] & 0xff) << 8) | (*m_ColorInputLUT[cc.a][RED_INP] & 0xff); - b = ((*m_ColorInputLUT[cc.b][BLU_C] & 0xff) << 16) | ((*m_ColorInputLUT[cc.b][GRN_C] & 0xff) << 8) | (*m_ColorInputLUT[cc.b][RED_INP] & 0xff); - for (int i = 0; i < 3; i++) - { - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((a == b) ? InputReg.c : 0); - } - } - break; - case TEVCMP_RGB8_GT: - for (int i = 0; i < 3; i++) - { - InputReg.a = *m_ColorInputLUT[cc.a][i]; - InputReg.b = *m_ColorInputLUT[cc.b][i]; - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((InputReg.a > InputReg.b) ? InputReg.c : 0); - } - break; - case TEVCMP_RGB8_EQ: - for (int i = 0; i < 3; i++) - { - InputReg.a = *m_ColorInputLUT[cc.a][i]; - InputReg.b = *m_ColorInputLUT[cc.b][i]; - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((InputReg.a == InputReg.b) ? InputReg.c : 0); - } - break; } } -void Tev::DrawAlphaRegular(TevStageCombiner::AlphaCombiner &ac) +void Tev::DrawAlphaRegular(TevStageCombiner::AlphaCombiner &ac, const InputRegType inputs[4]) { - InputRegType InputReg; - - InputReg.a = m_AlphaInputLUT[ac.a][ALP_C]; - InputReg.b = m_AlphaInputLUT[ac.b][ALP_C]; - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; + const InputRegType& InputReg = inputs[ALP_C]; u16 c = InputReg.c + (InputReg.c >> 7); @@ -327,88 +268,56 @@ void Tev::DrawAlphaRegular(TevStageCombiner::AlphaCombiner &ac) Reg[ac.dest][ALP_C] = result; } -void Tev::DrawAlphaCompare(TevStageCombiner::AlphaCombiner &ac) +void Tev::DrawAlphaCompare(TevStageCombiner::AlphaCombiner& ac, const InputRegType inputs[4]) { - int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here - - u32 a; - u32 b; - - InputRegType InputReg; - - switch (cmp) { + switch ((ac.shift<<1)|ac.op|8) // encoded compare mode + { case TEVCMP_R8_GT: - { - a = m_AlphaInputLUT[ac.a][RED_C] & 0xff; - b = m_AlphaInputLUT[ac.b][RED_C] & 0xff; - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((a > b) ? InputReg.c : 0); - } + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((inputs[RED_C].a > inputs[RED_C].b) ? inputs[ALP_C].c : 0); break; case TEVCMP_R8_EQ: - { - a = m_AlphaInputLUT[ac.a][RED_C] & 0xff; - b = m_AlphaInputLUT[ac.b][RED_C] & 0xff; - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((a == b) ? InputReg.c : 0); - } + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((inputs[RED_C].a == inputs[RED_C].b) ? inputs[ALP_C].c : 0); break; + case TEVCMP_GR16_GT: { - a = ((m_AlphaInputLUT[ac.a][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.a][RED_C] & 0xff); - b = ((m_AlphaInputLUT[ac.b][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.b][RED_C] & 0xff); - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((a > b) ? InputReg.c : 0); + u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a > b) ? inputs[ALP_C].c : 0); } break; + case TEVCMP_GR16_EQ: { - a = ((m_AlphaInputLUT[ac.a][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.a][RED_C] & 0xff); - b = ((m_AlphaInputLUT[ac.b][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.b][RED_C] & 0xff); - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((a == b) ? InputReg.c : 0); + u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a == b) ? inputs[ALP_C].c : 0); } break; + case TEVCMP_BGR24_GT: { - a = ((m_AlphaInputLUT[ac.a][BLU_C] & 0xff) << 16) | ((m_AlphaInputLUT[ac.a][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.a][RED_C] & 0xff); - b = ((m_AlphaInputLUT[ac.b][BLU_C] & 0xff) << 16) | ((m_AlphaInputLUT[ac.b][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.b][RED_C] & 0xff); - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((a > b) ? InputReg.c : 0); + u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a > b) ? inputs[ALP_C].c : 0); } break; + case TEVCMP_BGR24_EQ: { - a = ((m_AlphaInputLUT[ac.a][BLU_C] & 0xff) << 16) | ((m_AlphaInputLUT[ac.a][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.a][RED_C] & 0xff); - b = ((m_AlphaInputLUT[ac.b][BLU_C] & 0xff) << 16) | ((m_AlphaInputLUT[ac.b][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.b][RED_C] & 0xff); - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((a == b) ? InputReg.c : 0); + u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a == b) ? inputs[ALP_C].c : 0); } break; + case TEVCMP_A8_GT: - { - InputReg.a = m_AlphaInputLUT[ac.a][ALP_C]; - InputReg.b = m_AlphaInputLUT[ac.b][ALP_C]; - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((InputReg.a > InputReg.b) ? InputReg.c : 0); - } + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((inputs[ALP_C].a > inputs[ALP_C].b) ? inputs[ALP_C].c : 0); break; + case TEVCMP_A8_EQ: - { - InputReg.a = m_AlphaInputLUT[ac.a][ALP_C]; - InputReg.b = m_AlphaInputLUT[ac.b][ALP_C]; - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((InputReg.a == InputReg.b) ? InputReg.c : 0); - } + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((inputs[ALP_C].a == inputs[ALP_C].b) ? inputs[ALP_C].c : 0); break; } } @@ -666,10 +575,23 @@ void Tev::Draw() SetRasColor(order.getColorChan(stageOdd), ac.rswap * 2); // combine inputs + InputRegType inputs[4]; + for (int i = 0; i < 3; i++) + { + inputs[BLU_C + i].a = *m_ColorInputLUT[cc.a][i]; + inputs[BLU_C + i].b = *m_ColorInputLUT[cc.b][i]; + inputs[BLU_C + i].c = *m_ColorInputLUT[cc.c][i]; + inputs[BLU_C + i].d = *m_ColorInputLUT[cc.d][i]; + } + inputs[ALP_C].a = *m_AlphaInputLUT[ac.a]; + inputs[ALP_C].b = *m_AlphaInputLUT[ac.b]; + inputs[ALP_C].c = *m_AlphaInputLUT[ac.c]; + inputs[ALP_C].d = *m_AlphaInputLUT[ac.d]; + if (cc.bias != 3) - DrawColorRegular(cc); + DrawColorRegular(cc, inputs); else - DrawColorCompare(cc); + DrawColorCompare(cc, inputs); if (cc.clamp) { @@ -685,9 +607,9 @@ void Tev::Draw() } if (ac.bias != 3) - DrawAlphaRegular(ac); + DrawAlphaRegular(ac, inputs); else - DrawAlphaCompare(ac); + DrawAlphaCompare(ac, inputs); if (ac.clamp) Reg[ac.dest][ALP_C] = Clamp255(Reg[ac.dest][ALP_C]); diff --git a/Source/Core/VideoBackends/Software/Tev.h b/Source/Core/VideoBackends/Software/Tev.h index 617dee842b..ecb5fde9f0 100644 --- a/Source/Core/VideoBackends/Software/Tev.h +++ b/Source/Core/VideoBackends/Software/Tev.h @@ -60,10 +60,10 @@ class Tev void SetRasColor(int colorChan, int swaptable); - void DrawColorRegular(TevStageCombiner::ColorCombiner &cc); - void DrawColorCompare(TevStageCombiner::ColorCombiner &cc); - void DrawAlphaRegular(TevStageCombiner::AlphaCombiner &ac); - void DrawAlphaCompare(TevStageCombiner::AlphaCombiner &ac); + void DrawColorRegular(TevStageCombiner::ColorCombiner& cc, const InputRegType inputs[4]); + void DrawColorCompare(TevStageCombiner::ColorCombiner& cc, const InputRegType inputs[4]); + void DrawAlphaRegular(TevStageCombiner::AlphaCombiner& ac, const InputRegType inputs[4]); + void DrawAlphaCompare(TevStageCombiner::AlphaCombiner& ac, const InputRegType inputs[4]); void Indirect(unsigned int stageNum, s32 s, s32 t); From 1dead05cae47aeb9cb9e14a65a286e0f0de8dc95 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 24 Mar 2014 14:54:17 +0100 Subject: [PATCH 5/5] Software renderer: Properly calculate tev combiner output. As pointed out by dolphin-emu/hwtests@461476112. --- Source/Core/VideoBackends/Software/Tev.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp index 3c294ef5f6..8ee3715e03 100644 --- a/Source/Core/VideoBackends/Software/Tev.cpp +++ b/Source/Core/VideoBackends/Software/Tev.cpp @@ -185,10 +185,11 @@ void Tev::DrawColorRegular(TevStageCombiner::ColorCombiner &cc, const InputRegTy u16 c = InputReg.c + (InputReg.c >> 7); s32 temp = InputReg.a * (256 - c) + (InputReg.b * c); - temp = cc.op?(-temp >> 8):(temp >> 8); + temp <<= m_ScaleLShiftLUT[cc.shift]; + temp += (cc.shift != 3) ? 0 : (cc.op == 1) ? 127 : 128; + temp = cc.op ? (-temp >> 8) : (temp >> 8); - s32 result = InputReg.d + temp + m_BiasLUT[cc.bias]; - result = result << m_ScaleLShiftLUT[cc.shift]; + s32 result = ((InputReg.d + m_BiasLUT[cc.bias]) << m_ScaleLShiftLUT[cc.shift]) + temp; result = result >> m_ScaleRShiftLUT[cc.shift]; Reg[cc.dest][BLU_C + i] = result; @@ -259,10 +260,11 @@ void Tev::DrawAlphaRegular(TevStageCombiner::AlphaCombiner &ac, const InputRegTy u16 c = InputReg.c + (InputReg.c >> 7); s32 temp = InputReg.a * (256 - c) + (InputReg.b * c); - temp = ac.op?(-temp >> 8):(temp >> 8); + temp <<= m_ScaleLShiftLUT[ac.shift]; + temp += (ac.shift != 3) ? 0 : (ac.op == 1) ? 127 : 128; + temp = ac.op ? (-temp >> 8) : (temp >> 8); - s32 result = InputReg.d + temp + m_BiasLUT[ac.bias]; - result = result << m_ScaleLShiftLUT[ac.shift]; + s32 result = ((InputReg.d + m_BiasLUT[ac.bias]) << m_ScaleLShiftLUT[ac.shift]) + temp; result = result >> m_ScaleRShiftLUT[ac.shift]; Reg[ac.dest][ALP_C] = result;