2013-08-17 09:23:51 +00:00
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
# include "base/timeutil.h"
# include "Common/MemoryUtil.h"
# include "Core/MemMap.h"
# include "Core/Host.h"
# include "Core/System.h"
# include "Core/Reporting.h"
# include "Core/Config.h"
# include "Core/CoreTiming.h"
# include "helper/dx_state.h"
# include "native/ext/cityhash/city.h"
2013-08-20 16:47:11 +00:00
# include "ext/xxhash.h"
2013-08-17 09:23:51 +00:00
# include "GPU/Math3D.h"
# include "GPU/GPUState.h"
# include "GPU/ge_constants.h"
# include "StateMapping.h"
# include "TextureCache.h"
# include "TransformPipeline.h"
# include "VertexDecoder.h"
# include "ShaderManager.h"
# include "DisplayListInterpreter.h"
2013-08-20 16:47:11 +00:00
IDirect3DVertexDeclaration9 * pMixedVertexDecl = NULL ;
# pragma pack(push, 1)
struct MixedVertexFormat {
// float 3
float position [ 3 ] ;
// float 1
float fog ;
// float 3
float texcoord [ 3 ] ;
// float 3
float normal [ 3 ] ;
// D3DDECLTYPE_D3DCOLOR
float color0 [ 4 ] ; // prelit
float color1 [ 4 ] ; // prelit
} ;
# pragma pack(pop)
static MixedVertexFormat * mixedVertices = NULL ;
static MixedVertexFormat * mixedVertices_ = NULL ;
static u16 * mixedIndices ;
static u16 * mixedIndices_ ;
static void CreateVertexDeclaration ( ) {
const D3DVERTEXELEMENT9 vertexElements [ ] =
{
//{ 0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 },
//{ 0, 12, D3DDECLTYPE_FLOAT1, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_FOG, 0 },
{ 0 , 0 , D3DDECLTYPE_FLOAT4 , D3DDECLMETHOD_DEFAULT , D3DDECLUSAGE_POSITION , 0 } , // merge fog
{ 0 , 16 , D3DDECLTYPE_FLOAT3 , D3DDECLMETHOD_DEFAULT , D3DDECLUSAGE_TEXCOORD , 0 } ,
{ 0 , 28 , D3DDECLTYPE_FLOAT3 , D3DDECLMETHOD_DEFAULT , D3DDECLUSAGE_NORMAL , 0 } ,
{ 0 , 40 , D3DDECLTYPE_FLOAT4 , D3DDECLMETHOD_DEFAULT , D3DDECLUSAGE_COLOR , 0 } ,
{ 0 , 56 , D3DDECLTYPE_FLOAT4 , D3DDECLMETHOD_DEFAULT , D3DDECLUSAGE_COLOR , 1 } ,
D3DDECL_END ( )
} ;
pD3Ddevice - > CreateVertexDeclaration ( vertexElements , & pMixedVertexDecl ) ;
}
2013-08-17 09:23:51 +00:00
const D3DPRIMITIVETYPE glprim [ 8 ] = {
D3DPT_POINTLIST ,
D3DPT_LINELIST ,
D3DPT_LINESTRIP ,
D3DPT_TRIANGLELIST ,
D3DPT_TRIANGLESTRIP ,
D3DPT_TRIANGLEFAN ,
D3DPT_TRIANGLELIST , // With OpenGL ES we have to expand sprites into triangles, tripling the data instead of doubling. sigh. OpenGL ES, Y U NO SUPPORT GL_QUADS?
} ;
int D3DPrimCount ( D3DPRIMITIVETYPE prim , int size ) {
return ( size / D3DPRIMITIVEVERTEXCOUNT [ prim ] [ 0 ] ) - D3DPRIMITIVEVERTEXCOUNT [ prim ] [ 1 ] ;
}
enum {
DECODED_VERTEX_BUFFER_SIZE = 65536 * 48 ,
DECODED_INDEX_BUFFER_SIZE = 65536 * 20 ,
2013-08-20 16:47:11 +00:00
TRANSFORMED_VERTEX_BUFFER_SIZE = 65536 * sizeof ( TransformedVertex ) ,
MIXED_VERTEX_BUFFER_SIZE = 65536 * sizeof ( MixedVertexFormat )
2013-08-17 09:23:51 +00:00
} ;
2013-08-20 16:47:11 +00:00
# define VERTEXCACHE_DECIMATION_INTERVAL 17
2013-08-17 09:23:51 +00:00
inline float clamp ( float in , float min , float max ) {
return in < min ? min : ( in > max ? max : in ) ;
}
TransformDrawEngine : : TransformDrawEngine ( )
: collectedVerts ( 0 ) ,
prevPrim_ ( - 1 ) ,
dec_ ( 0 ) ,
lastVType_ ( - 1 ) ,
curVbo_ ( 0 ) ,
shaderManager_ ( 0 ) ,
textureCache_ ( 0 ) ,
framebufferManager_ ( 0 ) ,
numDrawCalls ( 0 ) ,
uvScale ( 0 ) {
2013-08-20 16:47:11 +00:00
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL ;
2013-08-17 09:23:51 +00:00
// Allocate nicely aligned memory. Maybe graphics drivers will
// appreciate it.
// All this is a LOT of memory, need to see if we can cut down somehow.
decoded = ( u8 * ) AllocateMemoryPages ( DECODED_VERTEX_BUFFER_SIZE ) ;
decIndex = ( u16 * ) AllocateMemoryPages ( DECODED_INDEX_BUFFER_SIZE ) ;
transformed = ( TransformedVertex * ) AllocateMemoryPages ( TRANSFORMED_VERTEX_BUFFER_SIZE ) ;
transformedExpanded = ( TransformedVertex * ) AllocateMemoryPages ( 3 * TRANSFORMED_VERTEX_BUFFER_SIZE ) ;
2013-08-20 16:47:11 +00:00
mixedVertices = ( MixedVertexFormat * ) AllocateMemoryPages ( MIXED_VERTEX_BUFFER_SIZE ) ;
2013-08-17 09:23:51 +00:00
// memset(vbo_, 0, sizeof(vbo_));
// memset(ebo_, 0, sizeof(ebo_));
2013-08-20 16:47:11 +00:00
if ( g_Config . bPrescaleUV ) {
uvScale = new UVScale [ MAX_DEFERRED_DRAW_CALLS ] ;
}
2013-08-17 09:23:51 +00:00
indexGen . Setup ( decIndex ) ;
InitDeviceObjects ( ) ;
2013-08-20 16:47:11 +00:00
CreateVertexDeclaration ( ) ;
2013-08-17 09:23:51 +00:00
//register_gl_resource_holder(this);
}
TransformDrawEngine : : ~ TransformDrawEngine ( ) {
DestroyDeviceObjects ( ) ;
FreeMemoryPages ( decoded , DECODED_VERTEX_BUFFER_SIZE ) ;
FreeMemoryPages ( decIndex , DECODED_INDEX_BUFFER_SIZE ) ;
FreeMemoryPages ( transformed , TRANSFORMED_VERTEX_BUFFER_SIZE ) ;
FreeMemoryPages ( transformedExpanded , 3 * TRANSFORMED_VERTEX_BUFFER_SIZE ) ;
2013-08-20 16:47:11 +00:00
FreeMemoryPages ( mixedVertices , MIXED_VERTEX_BUFFER_SIZE ) ;
2013-08-17 09:23:51 +00:00
//unregister_gl_resource_holder(this);
for ( auto iter = decoderMap_ . begin ( ) ; iter ! = decoderMap_ . end ( ) ; iter + + ) {
delete iter - > second ;
}
delete [ ] uvScale ;
2013-08-20 16:47:11 +00:00
pMixedVertexDecl - > Release ( ) ;
2013-08-17 09:23:51 +00:00
}
void TransformDrawEngine : : InitDeviceObjects ( ) {
2013-08-20 16:47:11 +00:00
for ( int i = 0 ; i < NUM_VBOS ; i + + ) {
pD3Ddevice - > CreateVertexBuffer ( MIXED_VERTEX_BUFFER_SIZE , NULL , NULL , D3DPOOL_DEFAULT , & vb [ i ] , NULL ) ;
pD3Ddevice - > CreateIndexBuffer ( MIXED_VERTEX_BUFFER_SIZE , NULL , D3DFMT_INDEX16 , D3DPOOL_DEFAULT , & ib [ i ] , NULL ) ;
2013-08-17 09:23:51 +00:00
}
}
void TransformDrawEngine : : DestroyDeviceObjects ( ) {
2013-08-20 16:47:11 +00:00
for ( int i = 0 ; i < NUM_VBOS ; i + + ) {
vb [ i ] - > Release ( ) ;
ib [ i ] - > Release ( ) ;
2013-08-17 09:23:51 +00:00
}
ClearTrackedVertexArrays ( ) ;
}
/*
void TransformDrawEngine : : GLLost ( ) {
// The objects have already been deleted.
memset ( vbo_ , 0 , sizeof ( vbo_ ) ) ;
memset ( ebo_ , 0 , sizeof ( ebo_ ) ) ;
ClearTrackedVertexArrays ( ) ;
InitDeviceObjects ( ) ;
}
*/
// Just to get something on the screen, we'll just not subdivide correctly.
void TransformDrawEngine : : DrawBezier ( int ucount , int vcount ) {
u16 indices [ 3 * 3 * 6 ] ;
Reporting : : ReportMessage ( " Unsupported bezier curve " ) ;
// if (gstate.patchprimitive)
// Generate indices for a rectangular mesh.
int c = 0 ;
for ( int y = 0 ; y < 3 ; y + + ) {
for ( int x = 0 ; x < 3 ; x + + ) {
indices [ c + + ] = y * 4 + x ;
indices [ c + + ] = y * 4 + x + 1 ;
indices [ c + + ] = ( y + 1 ) * 4 + x + 1 ;
indices [ c + + ] = ( y + 1 ) * 4 + x + 1 ;
indices [ c + + ] = ( y + 1 ) * 4 + x ;
indices [ c + + ] = y * 4 + x ;
}
}
// We are free to use the "decoded" buffer here.
// Let's split it into two to get a second buffer, there's enough space.
u8 * decoded2 = decoded + 65536 * 24 ;
// Alright, now for the vertex data.
// For now, we will simply inject UVs.
float customUV [ 4 * 4 * 2 ] ;
for ( int y = 0 ; y < 4 ; y + + ) {
for ( int x = 0 ; x < 4 ; x + + ) {
customUV [ ( y * 4 + x ) * 2 + 0 ] = ( float ) x / 3.0f ;
customUV [ ( y * 4 + x ) * 2 + 1 ] = ( float ) y / 3.0f ;
}
}
if ( ! ( gstate . vertType & GE_VTYPE_TC_MASK ) ) {
VertexDecoder * dec = GetVertexDecoder ( gstate . vertType ) ;
dec - > SetVertexType ( gstate . vertType ) ;
u32 newVertType = dec - > InjectUVs ( decoded2 , Memory : : GetPointer ( gstate_c . vertexAddr ) , customUV , 16 ) ;
SubmitPrim ( decoded2 , & indices [ 0 ] , GE_PRIM_TRIANGLES , c , newVertType , GE_VTYPE_IDX_16BIT , 0 ) ;
} else {
SubmitPrim ( Memory : : GetPointer ( gstate_c . vertexAddr ) , & indices [ 0 ] , GE_PRIM_TRIANGLES , c , gstate . vertType , GE_VTYPE_IDX_16BIT , 0 ) ;
}
Flush ( ) ; // as our vertex storage here is temporary, it will only survive one draw.
}
// Copy code from bezier. This is not right, but allow to display something.
void TransformDrawEngine : : DrawSpline ( int ucount , int vcount , int utype , int vtype ) {
u16 indices [ 3 * 3 * 6 ] ;
Reporting : : ReportMessage ( " Unsupported spline curve " ) ;
// Generate indices for a rectangular mesh.
int c = 0 ;
for ( int y = 0 ; y < 3 ; y + + ) {
for ( int x = 0 ; x < 3 ; x + + ) {
indices [ c + + ] = y * 4 + x ;
indices [ c + + ] = y * 4 + x + 1 ;
indices [ c + + ] = ( y + 1 ) * 4 + x + 1 ;
indices [ c + + ] = ( y + 1 ) * 4 + x + 1 ;
indices [ c + + ] = ( y + 1 ) * 4 + x ;
indices [ c + + ] = y * 4 + x ;
}
}
// We are free to use the "decoded" buffer here.
// Let's split it into two to get a second buffer, there's enough space.
u8 * decoded2 = decoded + 65536 * 24 ;
// Alright, now for the vertex data.
// For now, we will simply inject UVs.
float customUV [ 4 * 4 * 2 ] ;
for ( int y = 0 ; y < 4 ; y + + ) {
for ( int x = 0 ; x < 4 ; x + + ) {
customUV [ ( y * 4 + x ) * 2 + 0 ] = ( float ) x / 3.0f ;
customUV [ ( y * 4 + x ) * 2 + 1 ] = ( float ) y / 3.0f ;
}
}
if ( ! ( gstate . vertType & GE_VTYPE_TC_MASK ) ) {
VertexDecoder * dec = GetVertexDecoder ( gstate . vertType ) ;
dec - > SetVertexType ( gstate . vertType ) ;
u32 newVertType = dec - > InjectUVs ( decoded2 , Memory : : GetPointer ( gstate_c . vertexAddr ) , customUV , 16 ) ;
SubmitPrim ( decoded2 , & indices [ 0 ] , GE_PRIM_TRIANGLES , c , newVertType , GE_VTYPE_IDX_16BIT , 0 ) ;
} else {
SubmitPrim ( Memory : : GetPointer ( gstate_c . vertexAddr ) , & indices [ 0 ] , GE_PRIM_TRIANGLES , c , gstate . vertType , GE_VTYPE_IDX_16BIT , 0 ) ;
}
Flush ( ) ; // as our vertex storage here is temporary, it will only survive one draw.
}
// Convenient way to do precomputation to save the parts of the lighting calculation
// that's common between the many vertices of a draw call.
class Lighter {
public :
Lighter ( ) ;
void Light ( float colorOut0 [ 4 ] , float colorOut1 [ 4 ] , const float colorIn [ 4 ] , Vec3f pos , Vec3f normal ) ;
private :
Color4 globalAmbient ;
Color4 materialEmissive ;
Color4 materialAmbient ;
Color4 materialDiffuse ;
Color4 materialSpecular ;
float specCoef_ ;
// Vec3f viewer_;
bool doShadeMapping_ ;
int materialUpdate_ ;
} ;
Lighter : : Lighter ( ) {
doShadeMapping_ = ( gstate . texmapmode & 0x3 ) = = 2 ;
materialEmissive . GetFromRGB ( gstate . materialemissive ) ;
materialEmissive . a = 0.0f ;
globalAmbient . GetFromRGB ( gstate . ambientcolor ) ;
globalAmbient . GetFromA ( gstate . ambientalpha ) ;
materialAmbient . GetFromRGB ( gstate . materialambient ) ;
materialAmbient . GetFromA ( gstate . materialalpha ) ;
materialDiffuse . GetFromRGB ( gstate . materialdiffuse ) ;
materialDiffuse . a = 1.0f ;
materialSpecular . GetFromRGB ( gstate . materialspecular ) ;
materialSpecular . a = 1.0f ;
specCoef_ = getFloat24 ( gstate . materialspecularcoef ) ;
// viewer_ = Vec3f(-gstate.viewMatrix[9], -gstate.viewMatrix[10], -gstate.viewMatrix[11]);
materialUpdate_ = gstate . materialupdate & 7 ;
}
void Lighter : : Light ( float colorOut0 [ 4 ] , float colorOut1 [ 4 ] , const float colorIn [ 4 ] , Vec3f pos , Vec3f norm )
{
// Color are in dx format
Color4 in ;
in . a = colorIn [ 0 ] ;
in . r = colorIn [ 1 ] ;
in . g = colorIn [ 2 ] ;
in . b = colorIn [ 3 ] ;
const Color4 * ambient ;
if ( materialUpdate_ & 1 )
ambient = & in ;
else
ambient = & materialAmbient ;
const Color4 * diffuse ;
if ( materialUpdate_ & 2 )
diffuse = & in ;
else
diffuse = & materialDiffuse ;
const Color4 * specular ;
if ( materialUpdate_ & 4 )
specular = & in ;
else
specular = & materialSpecular ;
Color4 lightSum0 = globalAmbient * * ambient + materialEmissive ;
Color4 lightSum1 ( 0 , 0 , 0 , 0 ) ;
for ( int l = 0 ; l < 4 ; l + + )
{
// can we skip this light?
if ( ! gstate . isLightChanEnabled ( l ) )
continue ;
GELightType type = gstate . getLightType ( l ) ;
Vec3f toLight ( 0 , 0 , 0 ) ;
Vec3f lightDir ( 0 , 0 , 0 ) ;
if ( type = = GE_LIGHTTYPE_DIRECTIONAL )
toLight = Vec3f ( gstate_c . lightpos [ l ] ) ; // lightdir is for spotlights
else
toLight = Vec3f ( gstate_c . lightpos [ l ] ) - pos ;
bool doSpecular = gstate . isUsingSpecularLight ( l ) ;
bool poweredDiffuse = gstate . isUsingPoweredDiffuseLight ( l ) ;
float distanceToLight = toLight . Length ( ) ;
float dot = 0.0f ;
float angle = 0.0f ;
float lightScale = 0.0f ;
if ( distanceToLight > 0.0f ) {
toLight / = distanceToLight ;
dot = Dot ( toLight , norm ) ;
}
// Clamp dot to zero.
if ( dot < 0.0f ) dot = 0.0f ;
if ( poweredDiffuse )
dot = powf ( dot , specCoef_ ) ;
// Attenuation
switch ( type ) {
case GE_LIGHTTYPE_DIRECTIONAL :
lightScale = 1.0f ;
break ;
case GE_LIGHTTYPE_POINT :
lightScale = clamp ( 1.0f / ( gstate_c . lightatt [ l ] [ 0 ] + gstate_c . lightatt [ l ] [ 1 ] * distanceToLight + gstate_c . lightatt [ l ] [ 2 ] * distanceToLight * distanceToLight ) , 0.0f , 1.0f ) ;
break ;
case GE_LIGHTTYPE_SPOT :
lightDir = gstate_c . lightdir [ l ] ;
angle = Dot ( toLight . Normalized ( ) , lightDir . Normalized ( ) ) ;
if ( angle > = gstate_c . lightangle [ l ] )
lightScale = clamp ( 1.0f / ( gstate_c . lightatt [ l ] [ 0 ] + gstate_c . lightatt [ l ] [ 1 ] * distanceToLight + gstate_c . lightatt [ l ] [ 2 ] * distanceToLight * distanceToLight ) , 0.0f , 1.0f ) * powf ( angle , gstate_c . lightspotCoef [ l ] ) ;
break ;
default :
// ILLEGAL
break ;
}
Color4 lightDiff ( gstate_c . lightColor [ 1 ] [ l ] , 0.0f ) ;
Color4 diff = ( lightDiff * * diffuse ) * dot ;
// Real PSP specular
Vec3f toViewer ( 0 , 0 , 1 ) ;
// Better specular
// Vec3f toViewer = (viewer - pos).Normalized();
if ( doSpecular )
{
Vec3f halfVec = ( toLight + toViewer ) ;
halfVec . Normalize ( ) ;
dot = Dot ( halfVec , norm ) ;
if ( dot > 0.0f )
{
Color4 lightSpec ( gstate_c . lightColor [ 2 ] [ l ] , 0.0f ) ;
lightSum1 + = ( lightSpec * * specular * ( powf ( dot , specCoef_ ) * lightScale ) ) ;
}
}
if ( gstate . isLightChanEnabled ( l ) )
{
Color4 lightAmbient ( gstate_c . lightColor [ 0 ] [ l ] , 0.0f ) ;
lightSum0 + = ( lightAmbient * * ambient + diff ) * lightScale ;
}
}
// 4?
for ( int i = 0 ; i < 4 ; i + + ) {
colorOut0 [ i ] = lightSum0 [ i ] > 1.0f ? 1.0f : lightSum0 [ i ] ;
colorOut1 [ i ] = lightSum1 [ i ] > 1.0f ? 1.0f : lightSum1 [ i ] ;
}
}
struct GlTypeInfo {
u16 type ;
u8 count ;
u8 normalized ;
} ;
#if 0
static const GlTypeInfo GLComp [ ] = {
{ 0 } , // DEC_NONE,
{ GL_FLOAT , 1 , GL_FALSE } , // DEC_FLOAT_1,
{ GL_FLOAT , 2 , GL_FALSE } , // DEC_FLOAT_2,
{ GL_FLOAT , 3 , GL_FALSE } , // DEC_FLOAT_3,
{ GL_FLOAT , 4 , GL_FALSE } , // DEC_FLOAT_4,
{ GL_BYTE , 4 , GL_TRUE } , // DEC_S8_3,
{ GL_SHORT , 4 , GL_TRUE } , // DEC_S16_3,
{ GL_UNSIGNED_BYTE , 1 , GL_TRUE } , // DEC_U8_1,
{ GL_UNSIGNED_BYTE , 2 , GL_TRUE } , // DEC_U8_2,
{ GL_UNSIGNED_BYTE , 3 , GL_TRUE } , // DEC_U8_3,
{ GL_UNSIGNED_BYTE , 4 , GL_TRUE } , // DEC_U8_4,
{ GL_UNSIGNED_SHORT , 1 , GL_TRUE } , // DEC_U16_1,
{ GL_UNSIGNED_SHORT , 2 , GL_TRUE } , // DEC_U16_2,
{ GL_UNSIGNED_SHORT , 3 , GL_TRUE } , // DEC_U16_3,
{ GL_UNSIGNED_SHORT , 4 , GL_TRUE } , // DEC_U16_4,
{ GL_UNSIGNED_BYTE , 2 , GL_FALSE } , // DEC_U8A_2,
{ GL_UNSIGNED_SHORT , 2 , GL_FALSE } , // DEC_U16A_2,
} ;
static inline void VertexAttribSetup ( int attrib , int fmt , int stride , u8 * ptr ) {
if ( attrib ! = - 1 & & fmt ) {
const GlTypeInfo & type = GLComp [ fmt ] ;
glVertexAttribPointer ( attrib , type . count , type . type , type . normalized , stride , ptr ) ;
}
}
// TODO: Use VBO and get rid of the vertexData pointers - with that, we will supply only offsets
static void SetupDecFmtForDraw ( LinkedShader * program , const DecVtxFormat & decFmt , u8 * vertexData ) {
VertexAttribSetup ( program - > a_weight0123 , decFmt . w0fmt , decFmt . stride , vertexData + decFmt . w0off ) ;
VertexAttribSetup ( program - > a_weight4567 , decFmt . w1fmt , decFmt . stride , vertexData + decFmt . w1off ) ;
VertexAttribSetup ( program - > a_texcoord , decFmt . uvfmt , decFmt . stride , vertexData + decFmt . uvoff ) ;
VertexAttribSetup ( program - > a_color0 , decFmt . c0fmt , decFmt . stride , vertexData + decFmt . c0off ) ;
VertexAttribSetup ( program - > a_color1 , decFmt . c1fmt , decFmt . stride , vertexData + decFmt . c1off ) ;
VertexAttribSetup ( program - > a_normal , decFmt . nrmfmt , decFmt . stride , vertexData + decFmt . nrmoff ) ;
VertexAttribSetup ( program - > a_position , decFmt . posfmt , decFmt . stride , vertexData + decFmt . posoff ) ;
}
# endif
// The verts are in the order: BR BL TL TR
static void SwapUVs ( TransformedVertex & a , TransformedVertex & b ) {
float tempu = a . u ;
float tempv = a . v ;
a . u = b . u ;
a . v = b . v ;
b . u = tempu ;
b . v = tempv ;
}
// 2 3 3 2 0 3 2 1
// to to or
// 1 0 0 1 1 2 3 0
// See comment below where this was called before.
/*
static void RotateUV ( TransformedVertex v [ 4 ] ) {
float x1 = v [ 2 ] . x ;
float x2 = v [ 0 ] . x ;
float y1 = v [ 2 ] . y ;
float y2 = v [ 0 ] . y ;
if ( ( x1 < x2 & & y1 < y2 ) | | ( x1 > x2 & & y1 > y2 ) )
SwapUVs ( v [ 1 ] , v [ 3 ] ) ;
} */
static void RotateUVThrough ( TransformedVertex v [ 4 ] ) {
float x1 = v [ 2 ] . x ;
float x2 = v [ 0 ] . x ;
float y1 = v [ 2 ] . y ;
float y2 = v [ 0 ] . y ;
if ( ( x1 < x2 & & y1 > y2 ) | | ( x1 > x2 & & y1 < y2 ) )
SwapUVs ( v [ 1 ] , v [ 3 ] ) ;
}
// Clears on the PSP are best done by drawing a series of vertical strips
// in clear mode. This tries to detect that.
bool TransformDrawEngine : : IsReallyAClear ( int numVerts ) const {
if ( transformed [ 0 ] . x ! = 0.0f | | transformed [ 0 ] . y ! = 0.0f )
return false ;
u32 matchcolor ;
memcpy ( & matchcolor , transformed [ 0 ] . color0 , 4 ) ;
float matchz = transformed [ 0 ] . z ;
int bufW = gstate_c . curRTWidth ;
int bufH = gstate_c . curRTHeight ;
float prevX = 0.0f ;
for ( int i = 1 ; i < numVerts ; i + + ) {
u32 vcolor ;
memcpy ( & vcolor , transformed [ i ] . color0 , 4 ) ;
if ( vcolor ! = matchcolor | | transformed [ i ] . z ! = matchz )
return false ;
if ( ( i & 1 ) = = 0 ) {
// Top left of a rectangle
if ( transformed [ i ] . y ! = 0 )
return false ;
if ( i > 0 & & transformed [ i ] . x ! = transformed [ i - 1 ] . x )
return false ;
} else {
// Bottom right
if ( transformed [ i ] . y ! = bufH )
return false ;
if ( transformed [ i ] . x < = transformed [ i - 1 ] . x )
return false ;
}
}
// The last vertical strip often extends outside the drawing area.
if ( transformed [ numVerts - 1 ] . x < bufW )
return false ;
return true ;
}
// This is the software transform pipeline, which is necessary for supporting RECT
// primitives correctly, and may be easier to use for debugging than the hardware
// transform pipeline.
// There's code here that simply expands transformed RECTANGLES into plain triangles.
// We're gonna have to keep software transforming RECTANGLES, unless we use a geom shader which we can't on OpenGL ES 2.0.
// Usually, though, these primitives don't use lighting etc so it's no biggie performance wise, but it would be nice to get rid of
// this code.
// Actually, if we find the camera-relative right and down vectors, it might even be possible to add the extra points in pre-transformed
// space and thus make decent use of hardware transform.
// Actually again, single quads could be drawn more efficiently using GL_TRIANGLE_STRIP, no need to duplicate verts as for
// GL_TRIANGLES. Still need to sw transform to compute the extra two corners though.
void TransformDrawEngine : : SoftwareTransformAndDraw (
int prim , u8 * decoded , LinkedShader * program , int vertexCount , u32 vertType , void * inds , int indexType , const DecVtxFormat & decVtxFormat , int maxIndex ) {
bool throughmode = ( vertType & GE_VTYPE_THROUGH_MASK ) ! = 0 ;
bool lmode = gstate . isUsingSecondaryColor ( ) & & gstate . isLightingEnabled ( ) ;
// TODO: Split up into multiple draw calls for GLES 2.0 where you can't guarantee support for more than 0x10000 verts.
float uscale = 1.0f ;
float vscale = 1.0f ;
if ( throughmode ) {
uscale / = gstate_c . curTextureWidth ;
vscale / = gstate_c . curTextureHeight ;
}
2013-08-20 16:47:11 +00:00
int w = gstate . getTextureWidth ( 0 ) ;
int h = gstate . getTextureHeight ( 0 ) ;
2013-08-17 09:23:51 +00:00
float widthFactor = ( float ) w / ( float ) gstate_c . curTextureWidth ;
float heightFactor = ( float ) h / ( float ) gstate_c . curTextureHeight ;
Lighter lighter ;
float fog_end = getFloat24 ( gstate . fog1 ) ;
float fog_slope = getFloat24 ( gstate . fog2 ) ;
VertexReader reader ( decoded , decVtxFormat , vertType ) ;
for ( int index = 0 ; index < maxIndex ; index + + ) {
reader . Goto ( index ) ;
float v [ 3 ] = { 0 , 0 , 0 } ;
float c0 [ 4 ] = { 1 , 1 , 1 , 1 } ;
float c1 [ 4 ] = { 0 , 0 , 0 , 0 } ;
float uv [ 3 ] = { 0 , 0 , 0 } ;
float fogCoef = 1.0f ;
if ( throughmode ) {
// Do not touch the coordinates or the colors. No lighting.
reader . ReadPos ( v ) ;
if ( reader . hasColor0 ( ) ) {
reader . ReadColor0 ( c0 ) ;
for ( int j = 0 ; j < 4 ; j + + ) {
c1 [ j ] = 0.0f ;
}
} else {
c0 [ 0 ] = gstate . getMaterialAmbientA ( ) / 255.f ;
c0 [ 1 ] = gstate . getMaterialAmbientR ( ) / 255.f ;
c0 [ 2 ] = gstate . getMaterialAmbientG ( ) / 255.f ;
c0 [ 3 ] = gstate . getMaterialAmbientB ( ) / 255.f ;
}
if ( reader . hasUV ( ) ) {
reader . ReadUV ( uv ) ;
uv [ 0 ] * = uscale ;
uv [ 1 ] * = vscale ;
}
fogCoef = 1.0f ;
// Scale UV?
} else {
// We do software T&L for now
float out [ 3 ] , norm [ 3 ] ;
float pos [ 3 ] , nrm [ 3 ] ;
Vec3f normal ( 0 , 0 , 1 ) ;
reader . ReadPos ( pos ) ;
if ( reader . hasNormal ( ) )
reader . ReadNrm ( nrm ) ;
if ( ( vertType & GE_VTYPE_WEIGHT_MASK ) = = GE_VTYPE_WEIGHT_NONE ) {
Vec3ByMatrix43 ( out , pos , gstate . worldMatrix ) ;
if ( reader . hasNormal ( ) ) {
Norm3ByMatrix43 ( norm , nrm , gstate . worldMatrix ) ;
normal = Vec3f ( norm ) . Normalized ( ) ;
}
} else {
float weights [ 8 ] ;
reader . ReadWeights ( weights ) ;
// Skinning
Vec3f psum ( 0 , 0 , 0 ) ;
Vec3f nsum ( 0 , 0 , 0 ) ;
int nweights = ( ( vertType & GE_VTYPE_WEIGHTCOUNT_MASK ) > > GE_VTYPE_WEIGHTCOUNT_SHIFT ) + 1 ;
for ( int i = 0 ; i < nweights ; i + + )
{
if ( weights [ i ] ! = 0.0f ) {
Vec3ByMatrix43 ( out , pos , gstate . boneMatrix + i * 12 ) ;
Vec3f tpos ( out ) ;
psum + = tpos * weights [ i ] ;
if ( reader . hasNormal ( ) ) {
Norm3ByMatrix43 ( norm , nrm , gstate . boneMatrix + i * 12 ) ;
Vec3f tnorm ( norm ) ;
nsum + = tnorm * weights [ i ] ;
}
}
}
// Yes, we really must multiply by the world matrix too.
Vec3ByMatrix43 ( out , psum . AsArray ( ) , gstate . worldMatrix ) ;
if ( reader . hasNormal ( ) ) {
Norm3ByMatrix43 ( norm , nsum . AsArray ( ) , gstate . worldMatrix ) ;
normal = Vec3f ( norm ) . Normalized ( ) ;
}
}
// Perform lighting here if enabled. don't need to check through, it's checked above.
float unlitColor [ 4 ] = { 1 , 1 , 1 , 1 } ;
if ( reader . hasColor0 ( ) ) {
reader . ReadColor0 ( unlitColor ) ;
} else {
unlitColor [ 0 ] = gstate . getMaterialAmbientA ( ) / 255.f ;
unlitColor [ 1 ] = gstate . getMaterialAmbientR ( ) / 255.f ;
unlitColor [ 2 ] = gstate . getMaterialAmbientG ( ) / 255.f ;
unlitColor [ 3 ] = gstate . getMaterialAmbientB ( ) / 255.f ;
}
float litColor0 [ 4 ] ;
float litColor1 [ 4 ] ;
lighter . Light ( litColor0 , litColor1 , unlitColor , out , normal ) ;
if ( gstate . isLightingEnabled ( ) ) {
// Don't ignore gstate.lmode - we should send two colors in that case
for ( int j = 0 ; j < 4 ; j + + ) {
c0 [ j ] = litColor0 [ j ] ;
}
if ( lmode ) {
// Separate colors
for ( int j = 0 ; j < 4 ; j + + ) {
c1 [ j ] = litColor1 [ j ] ;
}
} else {
// Summed color into c0
for ( int j = 0 ; j < 4 ; j + + ) {
c0 [ j ] = ( ( c0 [ j ] + litColor1 [ j ] ) > 1.0f ) ? 1.0f : ( c0 [ j ] + litColor1 [ j ] ) ;
}
}
} else {
if ( reader . hasColor0 ( ) ) {
for ( int j = 0 ; j < 4 ; j + + ) {
c0 [ j ] = unlitColor [ j ] ;
}
} else {
c0 [ 0 ] = gstate . getMaterialAmbientA ( ) / 255.f ;
c0 [ 1 ] = gstate . getMaterialAmbientR ( ) / 255.f ;
c0 [ 2 ] = gstate . getMaterialAmbientG ( ) / 255.f ;
c0 [ 3 ] = gstate . getMaterialAmbientB ( ) / 255.f ;
}
if ( lmode ) {
for ( int j = 0 ; j < 4 ; j + + ) {
c1 [ j ] = 0.0f ;
}
}
}
float ruv [ 2 ] = { 0.0f , 0.0f } ;
if ( reader . hasUV ( ) )
reader . ReadUV ( ruv ) ;
// Perform texture coordinate generation after the transform and lighting - one style of UV depends on lights.
switch ( gstate . getUVGenMode ( ) )
{
case 0 : // UV mapping
// Texture scale/offset is only performed in this mode.
uv [ 0 ] = uscale * ( ruv [ 0 ] * gstate_c . uv . uScale + gstate_c . uv . uOff ) ;
uv [ 1 ] = vscale * ( ruv [ 1 ] * gstate_c . uv . vScale + gstate_c . uv . vOff ) ;
uv [ 2 ] = 1.0f ;
break ;
case 1 :
{
// Projection mapping
Vec3f source ;
switch ( gstate . getUVProjMode ( ) )
{
case 0 : // Use model space XYZ as source
source = pos ;
break ;
case 1 : // Use unscaled UV as source
source = Vec3f ( ruv [ 0 ] , ruv [ 1 ] , 0.0f ) ;
break ;
case 2 : // Use normalized normal as source
if ( reader . hasNormal ( ) ) {
source = Vec3f ( norm ) . Normalized ( ) ;
} else {
ERROR_LOG_REPORT ( G3D , " Normal projection mapping without normal? " ) ;
source = Vec3f : : AssignToAll ( 0.0f ) ;
}
break ;
case 3 : // Use non-normalized normal as source!
if ( reader . hasNormal ( ) ) {
source = Vec3f ( norm ) ;
} else {
ERROR_LOG_REPORT ( G3D , " Normal projection mapping without normal? " ) ;
source = Vec3f : : AssignToAll ( 0.0f ) ;
}
break ;
}
float uvw [ 3 ] ;
Vec3ByMatrix43 ( uvw , & source . x , gstate . tgenMatrix ) ;
uv [ 0 ] = uvw [ 0 ] ;
uv [ 1 ] = uvw [ 1 ] ;
uv [ 2 ] = uvw [ 2 ] ;
}
break ;
case 2 :
// Shade mapping - use two light sources to generate U and V.
{
Vec3f lightpos0 = Vec3f ( gstate_c . lightpos [ gstate . getUVLS0 ( ) ] ) . Normalized ( ) ;
Vec3f lightpos1 = Vec3f ( gstate_c . lightpos [ gstate . getUVLS1 ( ) ] ) . Normalized ( ) ;
uv [ 0 ] = ( 1.0f + Dot ( lightpos0 , normal ) ) / 2.0f ;
uv [ 1 ] = ( 1.0f - Dot ( lightpos1 , normal ) ) / 2.0f ;
uv [ 2 ] = 1.0f ;
}
break ;
default :
// Illegal
break ;
}
uv [ 0 ] = uv [ 0 ] * widthFactor ;
uv [ 1 ] = uv [ 1 ] * heightFactor ;
// Transform the coord by the view matrix.
Vec3ByMatrix43 ( v , out , gstate . viewMatrix ) ;
fogCoef = ( v [ 2 ] + fog_end ) * fog_slope ;
}
// TODO: Write to a flexible buffer, we don't always need all four components.
memcpy ( & transformed [ index ] . x , v , 3 * sizeof ( float ) ) ;
transformed [ index ] . fog = fogCoef ;
memcpy ( & transformed [ index ] . u , uv , 3 * sizeof ( float ) ) ;
if ( gstate_c . flipTexture ) {
transformed [ index ] . v = 1.0f - transformed [ index ] . v ;
}
for ( int i = 0 ; i < 4 ; i + + ) {
transformed [ index ] . color0 [ i ] = c0 [ i ] * 255.0f ;
}
for ( int i = 0 ; i < 3 ; i + + ) {
transformed [ index ] . color1 [ i ] = c1 [ i ] * 255.0f ;
}
}
// Step 2: expand rectangles.
const TransformedVertex * drawBuffer = transformed ;
int numTrans = 0 ;
bool drawIndexed = false ;
if ( prim ! = GE_PRIM_RECTANGLES ) {
// We can simply draw the unexpanded buffer.
numTrans = vertexCount ;
drawIndexed = true ;
} else {
numTrans = 0 ;
drawBuffer = transformedExpanded ;
TransformedVertex * trans = & transformedExpanded [ 0 ] ;
TransformedVertex saved ;
for ( int i = 0 ; i < vertexCount ; i + = 2 ) {
int index = ( ( const u16 * ) inds ) [ i ] ;
saved = transformed [ index ] ;
int index2 = ( ( const u16 * ) inds ) [ i + 1 ] ;
TransformedVertex & transVtx = transformed [ index2 ] ;
// We have to turn the rectangle into two triangles, so 6 points. Sigh.
// bottom right
trans [ 0 ] = transVtx ;
// bottom left
trans [ 1 ] = transVtx ;
trans [ 1 ] . y = saved . y ;
trans [ 1 ] . v = saved . v ;
// top left
trans [ 2 ] = transVtx ;
trans [ 2 ] . x = saved . x ;
trans [ 2 ] . y = saved . y ;
trans [ 2 ] . u = saved . u ;
trans [ 2 ] . v = saved . v ;
// top right
trans [ 3 ] = transVtx ;
trans [ 3 ] . x = saved . x ;
trans [ 3 ] . u = saved . u ;
// That's the four corners. Now process UV rotation.
if ( throughmode )
RotateUVThrough ( trans ) ;
// Apparently, non-through RotateUV just breaks things.
// If we find a game where it helps, we'll just have to figure out how they differ.
// Possibly, it has something to do with flipped viewport Y axis, which a few games use.
// else
// RotateUV(trans);
// bottom right
trans [ 4 ] = trans [ 0 ] ;
// top left
trans [ 5 ] = trans [ 2 ] ;
trans + = 6 ;
numTrans + = 6 ;
}
}
// TODO: Add a post-transform cache here for multi-RECTANGLES only.
// Might help for text drawing.
// these spam the gDebugger log.
const int vertexSize = sizeof ( transformed [ 0 ] ) ;
pD3Ddevice - > SetVertexDeclaration ( pSoftVertexDecl ) ;
/// Debug !!
//pD3Ddevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_WIREFRAME);
if ( drawIndexed ) {
pD3Ddevice - > DrawIndexedPrimitiveUP ( glprim [ prim ] , 0 , vertexCount , D3DPrimCount ( glprim [ prim ] , numTrans ) , inds , D3DFMT_INDEX16 , drawBuffer , sizeof ( TransformedVertex ) ) ;
} else {
pD3Ddevice - > DrawPrimitiveUP ( glprim [ prim ] , D3DPrimCount ( glprim [ prim ] , numTrans ) , drawBuffer , sizeof ( TransformedVertex ) ) ;
}
}
2013-08-20 16:47:11 +00:00
// Actually again, single quads could be drawn more efficiently using GL_TRIANGLE_STRIP, no need to duplicate verts as for
// GL_TRIANGLES. Still need to sw transform to compute the extra two corners though.
void TransformDrawEngine : : MixedTransformAndDraw ( int prim , u8 * decoded , LinkedShader * program , int vertexCount , u32 vertType , void * inds ,
int indexType , const DecVtxFormat & decVtxFormat , int maxIndex , LPDIRECT3DVERTEXBUFFER9 vb_ , LPDIRECT3DINDEXBUFFER9 ib_
) {
#if 0
// Get vertices/indices pointer
vb_ - > Lock ( 0 , vertexCount * sizeof ( MixedVertexFormat ) , ( void * * ) & mixedVertices , D3DLOCK_NOOVERWRITE ) ;
ib_ - > Lock ( 0 , maxIndex * sizeof ( short ) , ( void * * ) & mixedIndices , D3DLOCK_NOOVERWRITE ) ;
# endif
MixedVertexFormat * _mixedVertices = mixedVertices ;
VertexReader reader ( decoded , decVtxFormat , vertType ) ;
for ( int index = 0 ; index < maxIndex ; index + + ) {
reader . Goto ( index ) ;
_mixedVertices - > fog = 1.0f ;
reader . ReadPos ( _mixedVertices - > position ) ;
if ( reader . hasUV ( ) )
reader . ReadUV ( _mixedVertices - > texcoord ) ;
if ( reader . hasNormal ( ) )
reader . ReadNrm ( _mixedVertices - > normal ) ;
if ( reader . hasColor0 ( ) )
reader . ReadColor0 ( _mixedVertices - > color0 ) ;
if ( reader . hasColor1 ( ) )
reader . ReadColor0 ( _mixedVertices - > color1 ) ;
_mixedVertices + + ;
}
// Step 2: expand rectangles.
int numTrans = 0 ;
bool drawIndexed = true ;
numTrans = vertexCount ;
/*
if ( prim ! = GE_PRIM_RECTANGLES ) {
// We can simply draw the unexpanded buffer.
numTrans = vertexCount ;
drawIndexed = true ;
} else {
numTrans = 0 ;
drawBuffer = transformedExpanded ;
TransformedVertex * trans = & transformedExpanded [ 0 ] ;
TransformedVertex saved ;
for ( int i = 0 ; i < vertexCount ; i + = 2 ) {
int index = ( ( const u16 * ) inds ) [ i ] ;
saved = transformed [ index ] ;
int index2 = ( ( const u16 * ) inds ) [ i + 1 ] ;
TransformedVertex & transVtx = transformed [ index2 ] ;
// We have to turn the rectangle into two triangles, so 6 points. Sigh.
// bottom right
trans [ 0 ] = transVtx ;
// bottom left
trans [ 1 ] = transVtx ;
trans [ 1 ] . y = saved . y ;
trans [ 1 ] . v = saved . v ;
// top left
trans [ 2 ] = transVtx ;
trans [ 2 ] . x = saved . x ;
trans [ 2 ] . y = saved . y ;
trans [ 2 ] . u = saved . u ;
trans [ 2 ] . v = saved . v ;
// top right
trans [ 3 ] = transVtx ;
trans [ 3 ] . x = saved . x ;
trans [ 3 ] . u = saved . u ;
// That's the four corners. Now process UV rotation.
if ( throughmode )
RotateUVThrough ( trans ) ;
// Apparently, non-through RotateUV just breaks things.
// If we find a game where it helps, we'll just have to figure out how they differ.
// Possibly, it has something to do with flipped viewport Y axis, which a few games use.
// else
// RotateUV(trans);
// bottom right
trans [ 4 ] = trans [ 0 ] ;
// top left
trans [ 5 ] = trans [ 2 ] ;
trans + = 6 ;
numTrans + = 6 ;
}
}
*/
// TODO: Add a post-transform cache here for multi-RECTANGLES only.
// Might help for text drawing.
pD3Ddevice - > SetVertexDeclaration ( pMixedVertexDecl ) ;
/// Debug !!
//pD3Ddevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_WIREFRAME);
#if 0
vb_ - > Unlock ( ) ;
ib_ - > Unlock ( ) ;
# endif
if ( drawIndexed ) {
pD3Ddevice - > DrawIndexedPrimitiveUP ( glprim [ prim ] , 0 , vertexCount , D3DPrimCount ( glprim [ prim ] , numTrans ) , inds , D3DFMT_INDEX16 , mixedVertices , sizeof ( MixedVertexFormat ) ) ;
//pD3Ddevice->SetStreamSource(0, vb_, 0, sizeof(MixedVertexFormat));
//pD3Ddevice->SetIndices(ib_);
//pD3Ddevice->DrawIndexedPrimitive(glprim[prim], 0, 0, 0, 0, D3DPrimCount(glprim[prim], numTrans));
} else {
//pD3Ddevice->DrawPrimitiveUP(glprim[prim], D3DPrimCount(glprim[prim], numTrans), drawBuffer, sizeof(MixedVertexFormat));
}
}
2013-08-17 09:23:51 +00:00
VertexDecoder * TransformDrawEngine : : GetVertexDecoder ( u32 vtype ) {
auto iter = decoderMap_ . find ( vtype ) ;
if ( iter ! = decoderMap_ . end ( ) )
return iter - > second ;
VertexDecoder * dec = new VertexDecoder ( ) ;
dec - > SetVertexType ( vtype ) ;
decoderMap_ [ vtype ] = dec ;
return dec ;
}
void TransformDrawEngine : : SetupVertexDecoder ( u32 vertType ) {
// If vtype has changed, setup the vertex decoder.
// TODO: Simply cache the setup decoders instead.
if ( vertType ! = lastVType_ ) {
dec_ = GetVertexDecoder ( vertType ) ;
lastVType_ = vertType ;
}
}
int TransformDrawEngine : : EstimatePerVertexCost ( ) {
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
// runs in parallel with transform.
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
// went too fast and starts doing all the work over again).
int cost = 20 ;
if ( gstate . isLightingEnabled ( ) ) {
cost + = 10 ;
}
for ( int i = 0 ; i < 4 ; i + + ) {
if ( gstate . isLightChanEnabled ( i ) )
cost + = 10 ;
}
if ( gstate . getUVGenMode ( ) ! = 0 ) {
cost + = 20 ;
}
if ( dec_ & & dec_ - > morphcount > 1 ) {
cost + = 5 * dec_ - > morphcount ;
}
return cost ;
}
void TransformDrawEngine : : SubmitPrim ( void * verts , void * inds , int prim , int vertexCount , u32 vertType , int forceIndexType , int * bytesRead ) {
if ( vertexCount = = 0 )
return ; // we ignore zero-sized draw calls.
if ( ! indexGen . PrimCompatible ( prevPrim_ , prim ) | | numDrawCalls > = MAX_DEFERRED_DRAW_CALLS )
Flush ( ) ;
prevPrim_ = prim ;
SetupVertexDecoder ( vertType ) ;
dec_ - > IncrementStat ( STAT_VERTSSUBMITTED , vertexCount ) ;
if ( bytesRead )
* bytesRead = vertexCount * dec_ - > VertexSize ( ) ;
gpuStats . numDrawCalls + + ;
gpuStats . numVertsSubmitted + = vertexCount ;
DeferredDrawCall & dc = drawCalls [ numDrawCalls ] ;
dc . verts = verts ;
dc . inds = inds ;
dc . vertType = vertType ;
dc . indexType = ( ( forceIndexType = = - 1 ) ? ( vertType & GE_VTYPE_IDX_MASK ) : forceIndexType ) > > GE_VTYPE_IDX_SHIFT ;
dc . prim = prim ;
dc . vertexCount = vertexCount ;
if ( inds ) {
GetIndexBounds ( inds , vertexCount , vertType , & dc . indexLowerBound , & dc . indexUpperBound ) ;
} else {
dc . indexLowerBound = 0 ;
dc . indexUpperBound = vertexCount - 1 ;
}
if ( uvScale ) {
uvScale [ numDrawCalls ] = gstate_c . uv ;
}
numDrawCalls + + ;
}
void TransformDrawEngine : : DecodeVerts ( ) {
for ( int i = 0 ; i < numDrawCalls ; i + + ) {
const DeferredDrawCall & dc = drawCalls [ i ] ;
indexGen . SetIndex ( collectedVerts ) ;
int indexLowerBound = dc . indexLowerBound , indexUpperBound = dc . indexUpperBound ;
u32 indexType = dc . indexType ;
void * inds = dc . inds ;
if ( indexType = = GE_VTYPE_IDX_NONE > > GE_VTYPE_IDX_SHIFT ) {
// Decode the verts and apply morphing. Simple.
if ( uvScale )
gstate_c . uv = uvScale [ i ] ;
dec_ - > DecodeVerts ( decoded + collectedVerts * ( int ) dec_ - > GetDecVtxFmt ( ) . stride ,
dc . verts , indexLowerBound , indexUpperBound ) ;
collectedVerts + = indexUpperBound - indexLowerBound + 1 ;
indexGen . AddPrim ( dc . prim , dc . vertexCount ) ;
} else {
// It's fairly common that games issue long sequences of PRIM calls, with differing
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
// these as much as possible, so we make sure here to combine as many as possible
// into one nice big drawcall, sharing data.
// 1. Look ahead to find the max index, only looking as "matching" drawcalls.
// Expand the lower and upper bounds as we go.
int j = i + 1 ;
int lastMatch = i ;
while ( j < numDrawCalls ) {
if ( drawCalls [ j ] . verts ! = dc . verts )
break ;
if ( uvScale & & memcmp ( & uvScale [ j ] , & uvScale [ i ] , sizeof ( uvScale [ 0 ] ) ! = 0 ) )
break ;
indexLowerBound = std : : min ( indexLowerBound , ( int ) drawCalls [ j ] . indexLowerBound ) ;
indexUpperBound = std : : max ( indexUpperBound , ( int ) drawCalls [ j ] . indexUpperBound ) ;
lastMatch = j ;
j + + ;
}
// 2. Loop through the drawcalls, translating indices as we go.
for ( j = i ; j < = lastMatch ; j + + ) {
switch ( indexType ) {
case GE_VTYPE_IDX_8BIT > > GE_VTYPE_IDX_SHIFT :
indexGen . TranslatePrim ( drawCalls [ j ] . prim , drawCalls [ j ] . vertexCount , ( const u8 * ) drawCalls [ j ] . inds , indexLowerBound ) ;
break ;
case GE_VTYPE_IDX_16BIT > > GE_VTYPE_IDX_SHIFT :
indexGen . TranslatePrim ( drawCalls [ j ] . prim , drawCalls [ j ] . vertexCount , ( const u16 * ) drawCalls [ j ] . inds , indexLowerBound ) ;
break ;
}
}
int vertexCount = indexUpperBound - indexLowerBound + 1 ;
// 3. Decode that range of vertex data.
if ( uvScale )
gstate_c . uv = uvScale [ i ] ;
dec_ - > DecodeVerts ( decoded + collectedVerts * ( int ) dec_ - > GetDecVtxFmt ( ) . stride ,
dc . verts , indexLowerBound , indexUpperBound ) ;
collectedVerts + = vertexCount ;
// 4. Advance indexgen vertex counter.
indexGen . Advance ( vertexCount ) ;
i = lastMatch ;
}
}
// Sanity check
if ( indexGen . Prim ( ) < 0 ) {
ERROR_LOG ( HLE , " DecodeVerts: Failed to deduce prim: %i " , indexGen . Prim ( ) ) ;
// Force to points (0)
indexGen . AddPrim ( GE_PRIM_POINTS , 0 ) ;
}
}
u32 TransformDrawEngine : : ComputeHash ( ) {
u32 fullhash = 0 ;
int vertexSize = dec_ - > GetDecVtxFmt ( ) . stride ;
// TODO: Add some caps both for numDrawCalls and num verts to check?
2013-08-20 16:47:11 +00:00
// It is really very expensive to check all the vertex data so often.
2013-08-17 09:23:51 +00:00
for ( int i = 0 ; i < numDrawCalls ; i + + ) {
if ( ! drawCalls [ i ] . inds ) {
2013-08-20 16:47:11 +00:00
fullhash + = XXH32 ( ( const char * ) drawCalls [ i ] . verts , vertexSize * drawCalls [ i ] . vertexCount , 0x1DE8CAC4 ) ;
2013-08-17 09:23:51 +00:00
} else {
// This could get seriously expensive with sparse indices. Need to combine hashing ranges the same way
// we do when drawing.
2013-08-20 16:47:11 +00:00
fullhash + = XXH32 ( ( const char * ) drawCalls [ i ] . verts + vertexSize * drawCalls [ i ] . indexLowerBound ,
vertexSize * ( drawCalls [ i ] . indexUpperBound - drawCalls [ i ] . indexLowerBound ) , 0x029F3EE1 ) ;
2013-08-17 09:23:51 +00:00
int indexSize = ( dec_ - > VertexType ( ) & GE_VTYPE_IDX_MASK ) = = GE_VTYPE_IDX_16BIT ? 2 : 1 ;
2013-08-20 16:47:11 +00:00
fullhash + = XXH32 ( ( const char * ) drawCalls [ i ] . inds , indexSize * drawCalls [ i ] . vertexCount , 0x955FD1CA ) ;
2013-08-17 09:23:51 +00:00
}
}
return fullhash ;
}
u32 TransformDrawEngine : : ComputeFastDCID ( ) {
u32 hash = 0 ;
for ( int i = 0 ; i < numDrawCalls ; i + + ) {
hash ^ = ( u32 ) ( uintptr_t ) drawCalls [ i ] . verts ;
hash = __rotl ( hash , 13 ) ;
hash ^ = ( u32 ) ( uintptr_t ) drawCalls [ i ] . inds ;
hash = __rotl ( hash , 13 ) ;
hash ^ = ( u32 ) drawCalls [ i ] . vertType ;
hash = __rotl ( hash , 13 ) ;
hash ^ = ( u32 ) drawCalls [ i ] . vertexCount ;
hash = __rotl ( hash , 13 ) ;
hash ^ = ( u32 ) drawCalls [ i ] . prim ;
}
return hash ;
}
enum { VAI_KILL_AGE = 120 } ;
void TransformDrawEngine : : ClearTrackedVertexArrays ( ) {
for ( auto vai = vai_ . begin ( ) ; vai ! = vai_ . end ( ) ; vai + + ) {
delete vai - > second ;
}
vai_ . clear ( ) ;
}
void TransformDrawEngine : : DecimateTrackedVertexArrays ( ) {
2013-08-20 16:47:11 +00:00
if ( - - decimationCounter_ < = 0 ) {
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL ;
} else {
return ;
}
2013-08-17 13:11:27 +00:00
int threshold = gpuStats . numFlips - VAI_KILL_AGE ;
2013-08-17 09:23:51 +00:00
for ( auto iter = vai_ . begin ( ) ; iter ! = vai_ . end ( ) ; ) {
if ( iter - > second - > lastFrame < threshold ) {
delete iter - > second ;
vai_ . erase ( iter + + ) ;
}
else
+ + iter ;
}
// Enable if you want to see vertex decoders in the log output. Need a better way.
#if 0
char buffer [ 16384 ] ;
for ( std : : map < u32 , VertexDecoder * > : : iterator dec = decoderMap_ . begin ( ) ; dec ! = decoderMap_ . end ( ) ; + + dec ) {
char * ptr = buffer ;
ptr + = dec - > second - > ToString ( ptr ) ;
// *ptr++ = '\n';
NOTICE_LOG ( HLE , buffer ) ;
}
# endif
}
VertexArrayInfo : : ~ VertexArrayInfo ( ) {
if ( vbo ) {
vbo - > Release ( ) ;
}
if ( ebo ) {
ebo - > Release ( ) ;
}
}
2013-08-20 16:47:11 +00:00
void TransformDrawEngine : : DoFlush ( ) {
2013-08-17 09:23:51 +00:00
gpuStats . numFlushes + + ;
gpuStats . numTrackedVertexArrays = ( int ) vai_ . size ( ) ;
2013-08-20 16:47:11 +00:00
// This is not done on every drawcall, we should collect vertex data
2013-08-17 09:23:51 +00:00
// until critical state changes. That's when we draw (flush).
int prim = prevPrim_ ;
ApplyDrawState ( prim ) ;
LinkedShader * program = shaderManager_ - > ApplyShader ( prim ) ;
2013-08-20 16:47:11 +00:00
# if 1 // Not tested !
2013-08-17 09:23:51 +00:00
if ( program - > useHWTransform_ ) {
DecodeVerts ( ) ;
gpuStats . numUncachedVertsDrawn + = indexGen . VertexCount ( ) ;
prim = indexGen . Prim ( ) ;
2013-08-20 16:47:11 +00:00
// Undo the strip optimization, not supported by the SW code yet.
if ( prim = = GE_PRIM_TRIANGLE_STRIP )
prim = GE_PRIM_TRIANGLES ;
DEBUG_LOG ( G3D , " Flush prim %i SW! %i verts in one go " , prim , indexGen . VertexCount ( ) ) ;
MixedTransformAndDraw (
prim , decoded , program , indexGen . VertexCount ( ) ,
dec_ - > VertexType ( ) , ( void * ) decIndex , GE_VTYPE_IDX_16BIT , dec_ - > GetDecVtxFmt ( ) ,
indexGen . MaxIndex ( ) , 0 , 0 ) ;
2013-08-17 09:23:51 +00:00
} else
# endif
{
DecodeVerts ( ) ;
gpuStats . numUncachedVertsDrawn + = indexGen . VertexCount ( ) ;
prim = indexGen . Prim ( ) ;
// Undo the strip optimization, not supported by the SW code yet.
if ( prim = = GE_PRIM_TRIANGLE_STRIP )
prim = GE_PRIM_TRIANGLES ;
DEBUG_LOG ( G3D , " Flush prim %i SW! %i verts in one go " , prim , indexGen . VertexCount ( ) ) ;
SoftwareTransformAndDraw (
prim , decoded , program , indexGen . VertexCount ( ) ,
dec_ - > VertexType ( ) , ( void * ) decIndex , GE_VTYPE_IDX_16BIT , dec_ - > GetDecVtxFmt ( ) ,
indexGen . MaxIndex ( ) ) ;
}
indexGen . Reset ( ) ;
collectedVerts = 0 ;
numDrawCalls = 0 ;
prevPrim_ = - 1 ;
}