buggy hw transformation

This commit is contained in:
Ced2911 2013-08-21 11:10:56 +02:00
parent cde47ac1b9
commit 882001a371
3 changed files with 150 additions and 88 deletions

View File

@ -41,6 +41,10 @@
#include "DisplayListInterpreter.h"
#include <map>
IDirect3DVertexDeclaration9* pMixedVertexDecl = NULL;
#pragma pack(push, 1)
struct MixedVertexFormat {
@ -430,13 +434,13 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[
}
}
#if 0
struct GlTypeInfo {
u16 type;
u8 count;
u8 normalized;
};
#if 0
static const GlTypeInfo GLComp[] = {
{0}, // DEC_NONE,
{GL_FLOAT, 1, GL_FALSE}, // DEC_FLOAT_1,
@ -476,6 +480,95 @@ static void SetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decFmt
}
#endif
struct DeclTypeInfo {
u32 type;
};
static const DeclTypeInfo VComp[] = {
{0}, // DEC_NONE,
D3DDECLTYPE_FLOAT1, // DEC_FLOAT_1,
D3DDECLTYPE_FLOAT2, // DEC_FLOAT_2,
D3DDECLTYPE_FLOAT3, // DEC_FLOAT_3,
D3DDECLTYPE_FLOAT4, // DEC_FLOAT_4,
D3DDECLTYPE_BYTE4N, // DEC_S8_3,
D3DDECLTYPE_SHORT4N, // DEC_S16_3,
D3DDECLTYPE_UBYTE4N, // DEC_U8_1,
D3DDECLTYPE_UBYTE4N, // DEC_U8_2,
D3DDECLTYPE_UBYTE4N, // DEC_U8_3,
D3DDECLTYPE_UBYTE4N, // DEC_U8_4,
D3DDECLTYPE_USHORT4, // DEC_U16_1,
D3DDECLTYPE_USHORT4, // DEC_U16_2,
D3DDECLTYPE_USHORT4, // DEC_U16_3,
D3DDECLTYPE_USHORT4, // DEC_U16_4,
D3DDECLTYPE_BYTE4, // DEC_U8A_2,
D3DDECLTYPE_USHORT4, // DEC_U16A_2,
};
static void VertexAttribSetup(D3DVERTEXELEMENT9 * VertexElement, u8 fmt, u8 offset, u8 usage, u8 usage_index = 0) {
memset(VertexElement, 0, sizeof(D3DVERTEXELEMENT9));
VertexElement->Offset = offset;
VertexElement->Type = VComp[fmt].type;
VertexElement->Usage = usage;
VertexElement->UsageIndex = usage_index;
}
IDirect3DVertexDeclaration9* pHardwareVertexDecl = NULL;
// TODO: Use VBO and get rid of the vertexData pointers - with that, we will supply only offsets
static void SetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decFmt, u8 *vertexData) {
D3DVERTEXELEMENT9 VertexElements[8];
D3DVERTEXELEMENT9 * VertexElement = &VertexElements[0];
int offset = 0;
// Vertices Elements orders
// WEIGHT
if (decFmt.w0fmt != 0) {
VertexAttribSetup(VertexElement, decFmt.w0fmt, decFmt.w0off, D3DDECLUSAGE_BLENDWEIGHT, 0);
VertexElement++;
}
if (decFmt.w1fmt != 0) {
VertexAttribSetup(VertexElement, decFmt.w1fmt, decFmt.w1off, D3DDECLUSAGE_BLENDWEIGHT, 1);
VertexElement++;
}
// TC
if (decFmt.uvfmt != 0) {
VertexAttribSetup(VertexElement, decFmt.uvfmt, decFmt.uvoff, D3DDECLUSAGE_TEXCOORD);
VertexElement++;
}
// COLOR
if (decFmt.c0fmt != 0) {
VertexAttribSetup(VertexElement, decFmt.c0fmt, decFmt.c0off, D3DDECLUSAGE_COLOR, 0);
VertexElement++;
}
if (decFmt.c1fmt != 0) {
VertexAttribSetup(VertexElement, decFmt.c1fmt, decFmt.c1off, D3DDECLUSAGE_COLOR, 1);
VertexElement++;
}
// NORMAL
if (decFmt.nrmfmt != 0) {
VertexAttribSetup(VertexElement, decFmt.nrmfmt, decFmt.nrmoff, D3DDECLUSAGE_NORMAL, 0);
VertexElement++;
}
// POSITION
// Always
VertexAttribSetup(VertexElement, decFmt.posfmt, decFmt.posoff, D3DDECLUSAGE_POSITION, 0);
VertexElement++;
// End
D3DVERTEXELEMENT9 end = D3DDECL_END();
memcpy(VertexElement, &end, sizeof(D3DVERTEXELEMENT9));
// Create declaration
pD3Ddevice->CreateVertexDeclaration( VertexElements, &pHardwareVertexDecl );
}
// The verts are in the order: BR BL TL TR
static void SwapUVs(TransformedVertex &a, TransformedVertex &b) {
float tempu = a.u;
@ -889,8 +982,9 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
}
}
// Actually again, single quads could be drawn more efficiently using GL_TRIANGLE_STRIP, no need to duplicate verts as for
// GL_TRIANGLES. Still need to sw transform to compute the extra two corners though.
/**
* DirectX can't handle all vertice format, we need to convert them
*/
void TransformDrawEngine::MixedTransformAndDraw(int prim, u8 *decoded, LinkedShader *program, int vertexCount, u32 vertType, void *inds,
int indexType, const DecVtxFormat &decVtxFormat, int maxIndex, LPDIRECT3DVERTEXBUFFER9 vb_, LPDIRECT3DINDEXBUFFER9 ib_
) {
@ -931,67 +1025,7 @@ void TransformDrawEngine::MixedTransformAndDraw(int prim, u8 *decoded, LinkedSha
bool drawIndexed = true;
numTrans = vertexCount;
/*
if (prim != GE_PRIM_RECTANGLES) {
// We can simply draw the unexpanded buffer.
numTrans = vertexCount;
drawIndexed = true;
} else {
numTrans = 0;
drawBuffer = transformedExpanded;
TransformedVertex *trans = &transformedExpanded[0];
TransformedVertex saved;
for (int i = 0; i < vertexCount; i += 2) {
int index = ((const u16*)inds)[i];
saved = transformed[index];
int index2 = ((const u16*)inds)[i + 1];
TransformedVertex &transVtx = transformed[index2];
// We have to turn the rectangle into two triangles, so 6 points. Sigh.
// bottom right
trans[0] = transVtx;
// bottom left
trans[1] = transVtx;
trans[1].y = saved.y;
trans[1].v = saved.v;
// top left
trans[2] = transVtx;
trans[2].x = saved.x;
trans[2].y = saved.y;
trans[2].u = saved.u;
trans[2].v = saved.v;
// top right
trans[3] = transVtx;
trans[3].x = saved.x;
trans[3].u = saved.u;
// That's the four corners. Now process UV rotation.
if (throughmode)
RotateUVThrough(trans);
// Apparently, non-through RotateUV just breaks things.
// If we find a game where it helps, we'll just have to figure out how they differ.
// Possibly, it has something to do with flipped viewport Y axis, which a few games use.
// else
// RotateUV(trans);
// bottom right
trans[4] = trans[0];
// top left
trans[5] = trans[2];
trans += 6;
numTrans += 6;
}
}
*/
// TODO: Add a post-transform cache here for multi-RECTANGLES only.
// Might help for text drawing.
// TODO
pD3Ddevice->SetVertexDeclaration(pMixedVertexDecl);
@ -1266,7 +1300,7 @@ void TransformDrawEngine::DoFlush() {
ApplyDrawState(prim);
LinkedShader *program = shaderManager_->ApplyShader(prim);
#if 1 // Not tested !
#if 0 // Colors errors !
if (program->useHWTransform_) {
DecodeVerts();
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
@ -1281,6 +1315,28 @@ void TransformDrawEngine::DoFlush() {
dec_->VertexType(), (void *)decIndex, GE_VTYPE_IDX_16BIT, dec_->GetDecVtxFmt(),
indexGen.MaxIndex(), 0, 0);
} else
#endif
#if 1
if (program->useHWTransform_) {
int vertexCount = 0;
bool useElements = true;
DecodeVerts();
prim = indexGen.Prim();
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
useElements = !indexGen.SeenOnlyPurePrims();
vertexCount = indexGen.VertexCount();
if (!useElements && indexGen.PureCount()) {
vertexCount = indexGen.PureCount();
}
SetupDecFmtForDraw(program, dec_->GetDecVtxFmt(), decoded);
pD3Ddevice->SetVertexDeclaration(pHardwareVertexDecl);
if (useElements) {
pD3Ddevice->DrawIndexedPrimitiveUP(glprim[prim], 0, vertexCount, D3DPrimCount(glprim[prim], vertexCount), decIndex, D3DFMT_INDEX16, decoded, dec_->GetDecVtxFmt().stride);
} else {
pD3Ddevice->DrawPrimitiveUP(glprim[prim], D3DPrimCount(glprim[prim], vertexCount), decoded, dec_->GetDecVtxFmt().stride);
}
} else
#endif
{
DecodeVerts();

View File

@ -107,14 +107,14 @@ void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform) {
}
static const char * const boneWeightAttrDecl[8] = {
"float a_w1;\n",
"float2 a_w1;\n",
"float3 a_w1;\n",
"float4 a_w1;\n",
"float4 a_w1;\n float a_w2;\n",
"float4 a_w1;\n float a_w2;\n",
"float4 a_w1;\n float a_w2;\n",
"float4 a_w1;\n float a_w2;\n",
"float a_w1:BLENDWEIGHT0;\n",
"float2 a_w1:BLENDWEIGHT0;\n",
"float3 a_w1:BLENDWEIGHT0;\n",
"float4 a_w1:BLENDWEIGHT0;\n",
"float4 a_w1:BLENDWEIGHT0;\n float a_w2:BLENDWEIGHT1;\n",
"float4 a_w1:BLENDWEIGHT0;\n float a_w2:BLENDWEIGHT1;\n",
"float4 a_w1:BLENDWEIGHT0;\n float a_w2:BLENDWEIGHT1;\n",
"float4 a_w1:BLENDWEIGHT0;\n float a_w2:BLENDWEIGHT1;\n",
};
enum DoLightComputation {
@ -254,11 +254,17 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
WRITE(p, " struct VS_IN \n");
WRITE(p, " \n");
WRITE(p, " { \n");
WRITE(p, " float4 ObjPos: POSITION; \n");
WRITE(p, " float3 Uv : TEXCOORD0; \n");
WRITE(p, " float3 Normal: NORMAL; \n");
if (gstate.getWeightMask() != GE_VTYPE_WEIGHT_NONE) {
WRITE(p, "%s", boneWeightAttrDecl[TranslateNumBones(gstate.getNumBoneWeights())]);
}
if (doTexture)
WRITE(p, " float3 Uv : TEXCOORD0; \n");
if (hasColor)
WRITE(p, " float4 C1 : COLOR0; \n");
WRITE(p, " float4 C2 : COLOR1; \n");
//WRITE(p, " float4 C2 : COLOR1; \n"); // only software transform supplies color1 as vertex data
if (useHWTransform && hasNormal)
WRITE(p, " float3 Normal: NORMAL; \n");
WRITE(p, " float3 ObjPos: POSITION; \n");
WRITE(p, " }; \n");
WRITE(p, " \n");
WRITE(p, " struct VS_OUT \n");
@ -358,15 +364,15 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
// Uncomment this to screw up bone shaders to check the vertex shader software fallback
// WRITE(p, "THIS SHOULD ERROR! #error");
if (numWeights == 1)
WRITE(p, " float4x4 skinMatrix = a_w1 * u_bone0");
WRITE(p, " float4x4 skinMatrix = In.a_w1 * u_bone0");
else
WRITE(p, " float4x4 skinMatrix = a_w1.x * u_bone0");
WRITE(p, " float4x4 skinMatrix = In.a_w1.x * u_bone0");
for (int i = 1; i < numWeights; i++) {
const char *weightAttr = boneWeightAttr[i];
// workaround for "cant do .x of scalar" issue
if (numWeights == 1 && i == 0) weightAttr = "a_w1";
if (numWeights == 5 && i == 4) weightAttr = "a_w2";
WRITE(p, " + %s * u_bone%i", weightAttr, i);
WRITE(p, " + In.%s * u_bone%i", weightAttr, i);
}
#endif
@ -375,14 +381,14 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
WRITE(p, ";\n");
// Trying to simplify this results in bugs in LBP...
WRITE(p, " float3 skinnedpos = (skinMatrix * float4(In.ObjPos.xyz, 1.0)).xyz %s;\n", factor);
WRITE(p, " float3 worldpos = (u_world * float4(skinnedpos, 1.0)).xyz;\n");
WRITE(p, " float3 skinnedpos = mul(float4(In.ObjPos.xyz, 1.0), skinMatrix).xyz %s;\n", factor);
WRITE(p, " float3 worldpos = mul(float4(skinnedpos, 1.0), u_world).xyz;\n");
if (hasNormal) {
WRITE(p, " float3 skinnednormal = (skinMatrix * float4(In.Normal, 0.0)).xyz %s;\n", factor);
WRITE(p, " float3 worldnormal = normalize((u_world * float4(skinnednormal, 0.0)).xyz);\n");
WRITE(p, " float3 skinnednormal = mul(float4(In.Normal, 0.0), skinMatrix).xyz %s;\n", factor);
WRITE(p, " float3 worldnormal = normalize(mul(float4(skinnednormal, 0.0), u_world).xyz);\n");
} else {
WRITE(p, " float3 worldnormal = (u_world * (skinMatrix * float4(0.0, 0.0, 1.0, 0.0))).xyz;\n");
WRITE(p, " float3 worldnormal = mul( mul( float4(0.0, 0.0, 1.0, 0.0), skinMatrix), u_world).xyz;\n");
}
}

View File

@ -67,7 +67,7 @@ void fbo_unbind() {
pD3Ddevice->Resolve( D3DRESOLVE_RENDERTARGET0|D3DRESOLVE_ALLFRAGMENTS, NULL,
current_fbo->tex, NULL, 0, 0, 0, 0.0f, 0, NULL );
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, 0xFFFFFFFF, 0, 0);
//pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, 0xFFFFFFFF, 0, 0);
}
current_fbo = NULL;
@ -114,5 +114,5 @@ void SwapBuffer() {
pD3Ddevice->Present(0, 0, 0, 0);
// :s
//pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, 0xFFFFFFFF, 0, 0);
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, 0xFFFFFFFF, 0, 0);
}