2013-08-17 09:23:51 +00:00
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
2013-11-15 13:24:25 +00:00
// Ideas for speeding things up on mobile OpenGL ES implementations
//
// Use superbuffers! Yes I just invented that name.
//
// The idea is to avoid respecifying the vertex format between every draw call (multiple glVertexAttribPointer ...)
// by combining the contents of multiple draw calls into one buffer, as long as
// they have exactly the same output vertex format. (different input formats is fine! This way
// we can combine the data for multiple draws with different numbers of bones, as we consider numbones < 4 to be = 4)
// into one VBO.
//
// This will likely be a win because I believe that between every change of VBO + glVertexAttribPointer*N, the driver will
// perform a lot of validation, probably at draw call time, while all the validation can be skipped if the only thing
// that changes between two draw calls is simple state or texture or a matrix etc, not anything vertex related.
// Also the driver will have to manage hundreds instead of thousands of VBOs in games like GTA.
//
// * Every 10 frames or something, do the following:
// - Frame 1:
// + Mark all drawn buffers with in-frame sequence numbers (alternatively,
// just log them in an array)
// - Frame 2 (beginning?):
// + Take adjacent buffers that have the same output vertex format, and add them
// to a list of buffers to combine. Create said buffers with appropriate sizes
// and precompute the offsets that the draws should be written into.
// - Frame 2 (end):
// + Actually do the work of combining the buffers. This probably means re-decoding
// the vertices into a new one. Will also have to apply index offsets.
//
// Also need to change the drawing code so that we don't glBindBuffer and respecify glVAP if
// two subsequent drawcalls come from the same superbuffer.
//
// Or we ignore all of this including vertex caching and simply find a way to do highly optimized vertex streaming,
// like Dolphin is trying to. That will likely never be able to reach the same speed as perfectly optimized
// superbuffers though. For this we will have to JIT the vertex decoder but that's not too hard.
//
// Now, when do we delete superbuffers? Maybe when half the buffers within have been killed?
//
// Another idea for GTA which switches textures a lot while not changing much other state is to use ES 3 Array
// textures, if they are the same size (even if they aren't, might be okay to simply resize the textures to match
// if they're just a multiple of 2 away) or something. Then we'd have to add a W texture coordinate to choose the
// texture within the bound texture array to the vertex data when merging into superbuffers.
//
// There are even more things to try. For games that do matrix palette skinning by quickly switching bones and
// just drawing a few triangles per call (NBA, FF:CC, Tekken 6 etc) we could even collect matrices, upload them
// all at once, writing matrix indices into the vertices in addition to the weights, and then doing a single
// draw call with specially generated shader to draw the whole mesh. This code will be seriously complex though.
# include "base/logging.h"
2013-08-17 09:23:51 +00:00
# include "base/timeutil.h"
# include "Common/MemoryUtil.h"
# include "Core/MemMap.h"
# include "Core/Host.h"
# include "Core/System.h"
# include "Core/Reporting.h"
# include "Core/Config.h"
# include "Core/CoreTiming.h"
# include "helper/dx_state.h"
# include "GPU/Math3D.h"
# include "GPU/GPUState.h"
# include "GPU/ge_constants.h"
2014-03-25 07:21:04 +00:00
# include "GPU/Common/TextureDecoder.h"
2014-08-25 05:16:32 +00:00
# include "GPU/Common/SplineCommon.h"
2014-04-18 12:30:18 +00:00
# include "GPU/Common/TransformCommon.h"
2013-09-15 10:46:14 +00:00
# include "GPU/Directx9/StateMappingDX9.h"
# include "GPU/Directx9/TextureCacheDX9.h"
# include "GPU/Directx9/TransformPipelineDX9.h"
2014-09-10 08:28:44 +00:00
# include "GPU/GLES/VertexDecoder.h"
2013-09-15 10:46:14 +00:00
# include "GPU/Directx9/ShaderManagerDX9.h"
# include "GPU/Directx9/GPU_DX9.h"
2013-08-17 09:23:51 +00:00
2013-09-15 15:53:21 +00:00
namespace DX9 {
2013-08-17 09:23:51 +00:00
const D3DPRIMITIVETYPE glprim [ 8 ] = {
D3DPT_POINTLIST ,
D3DPT_LINELIST ,
D3DPT_LINESTRIP ,
D3DPT_TRIANGLELIST ,
D3DPT_TRIANGLESTRIP ,
D3DPT_TRIANGLEFAN ,
D3DPT_TRIANGLELIST , // With OpenGL ES we have to expand sprites into triangles, tripling the data instead of doubling. sigh. OpenGL ES, Y U NO SUPPORT GL_QUADS?
} ;
2013-09-15 10:46:14 +00:00
// hrydgard's quick guesses - TODO verify
static const int D3DPRIMITIVEVERTEXCOUNT [ 8 ] [ 2 ] = {
{ 0 , 0 } , // invalid
{ 1 , 0 } , // 1 = D3DPT_POINTLIST,
{ 2 , 0 } , // 2 = D3DPT_LINELIST,
{ 2 , 1 } , // 3 = D3DPT_LINESTRIP,
{ 3 , 0 } , // 4 = D3DPT_TRIANGLELIST,
{ 1 , 2 } , // 5 = D3DPT_TRIANGLESTRIP,
{ 1 , 2 } , // 6 = D3DPT_TRIANGLEFAN,
} ;
2013-08-17 09:23:51 +00:00
int D3DPrimCount ( D3DPRIMITIVETYPE prim , int size ) {
return ( size / D3DPRIMITIVEVERTEXCOUNT [ prim ] [ 0 ] ) - D3DPRIMITIVEVERTEXCOUNT [ prim ] [ 1 ] ;
}
enum {
2013-10-27 21:43:58 +00:00
VERTEX_BUFFER_MAX = 65536 ,
DECODED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 48 ,
DECODED_INDEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 20 ,
TRANSFORMED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * sizeof ( TransformedVertex )
2013-08-17 09:23:51 +00:00
} ;
2013-11-15 13:24:25 +00:00
# define QUAD_INDICES_MAX 32768
2013-08-20 16:47:11 +00:00
# define VERTEXCACHE_DECIMATION_INTERVAL 17
2013-11-15 13:24:25 +00:00
// Check for max first as clamping to max is more common than min when lighting.
inline float clamp ( float in , float min , float max ) {
return in > max ? max : ( in < min ? min : in ) ;
}
2013-08-17 09:23:51 +00:00
2013-09-15 10:46:14 +00:00
TransformDrawEngineDX9 : : TransformDrawEngineDX9 ( )
2013-08-17 09:23:51 +00:00
: collectedVerts ( 0 ) ,
2013-09-04 09:19:36 +00:00
prevPrim_ ( GE_PRIM_INVALID ) ,
2013-08-23 15:24:51 +00:00
dec_ ( 0 ) ,
lastVType_ ( - 1 ) ,
shaderManager_ ( 0 ) ,
textureCache_ ( 0 ) ,
framebufferManager_ ( 0 ) ,
numDrawCalls ( 0 ) ,
2013-10-27 21:43:58 +00:00
vertexCountInDrawCalls ( 0 ) ,
2013-08-23 15:24:51 +00:00
uvScale ( 0 ) {
2014-09-10 08:28:44 +00:00
memset ( & decOptions_ , 0 , sizeof ( decOptions_ ) ) ;
decOptions_ . expandAllUVtoFloat = true ;
2013-11-15 13:24:25 +00:00
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL ;
// Allocate nicely aligned memory. Maybe graphics drivers will
// appreciate it.
// All this is a LOT of memory, need to see if we can cut down somehow.
decoded = ( u8 * ) AllocateMemoryPages ( DECODED_VERTEX_BUFFER_SIZE ) ;
decIndex = ( u16 * ) AllocateMemoryPages ( DECODED_INDEX_BUFFER_SIZE ) ;
transformed = ( TransformedVertex * ) AllocateMemoryPages ( TRANSFORMED_VERTEX_BUFFER_SIZE ) ;
transformedExpanded = ( TransformedVertex * ) AllocateMemoryPages ( 3 * TRANSFORMED_VERTEX_BUFFER_SIZE ) ;
2014-09-08 22:29:01 +00:00
quadIndices_ = new u16 [ 6 * QUAD_INDICES_MAX ] ;
2013-11-15 13:24:25 +00:00
for ( int i = 0 ; i < QUAD_INDICES_MAX ; i + + ) {
quadIndices_ [ i * 6 + 0 ] = i * 4 ;
quadIndices_ [ i * 6 + 1 ] = i * 4 + 2 ;
quadIndices_ [ i * 6 + 2 ] = i * 4 + 1 ;
quadIndices_ [ i * 6 + 3 ] = i * 4 + 1 ;
quadIndices_ [ i * 6 + 4 ] = i * 4 + 2 ;
quadIndices_ [ i * 6 + 5 ] = i * 4 + 3 ;
}
2013-08-23 15:24:51 +00:00
2013-12-03 15:53:30 +00:00
if ( g_Config . bPrescaleUV ) {
uvScale = new UVScale [ MAX_DEFERRED_DRAW_CALLS ] ;
}
indexGen . Setup ( decIndex ) ;
InitDeviceObjects ( ) ;
2013-08-17 09:23:51 +00:00
}
2013-09-15 10:46:14 +00:00
TransformDrawEngineDX9 : : ~ TransformDrawEngineDX9 ( ) {
2013-08-17 09:23:51 +00:00
DestroyDeviceObjects ( ) ;
FreeMemoryPages ( decoded , DECODED_VERTEX_BUFFER_SIZE ) ;
FreeMemoryPages ( decIndex , DECODED_INDEX_BUFFER_SIZE ) ;
FreeMemoryPages ( transformed , TRANSFORMED_VERTEX_BUFFER_SIZE ) ;
FreeMemoryPages ( transformedExpanded , 3 * TRANSFORMED_VERTEX_BUFFER_SIZE ) ;
2013-08-23 15:24:51 +00:00
2014-09-07 20:07:12 +00:00
for ( auto decl = vertexDeclMap_ . begin ( ) ; decl ! = vertexDeclMap_ . end ( ) ; + + decl ) {
2014-09-08 22:29:01 +00:00
if ( decl - > second ) {
decl - > second - > Release ( ) ;
}
2014-09-07 20:07:12 +00:00
}
2013-11-15 13:24:25 +00:00
delete [ ] quadIndices_ ;
2013-08-17 09:23:51 +00:00
for ( auto iter = decoderMap_ . begin ( ) ; iter ! = decoderMap_ . end ( ) ; iter + + ) {
delete iter - > second ;
}
delete [ ] uvScale ;
}
2013-09-15 10:46:14 +00:00
void TransformDrawEngineDX9 : : InitDeviceObjects ( ) {
2013-08-23 15:24:51 +00:00
2013-08-17 09:23:51 +00:00
}
2013-09-15 10:46:14 +00:00
void TransformDrawEngineDX9 : : DestroyDeviceObjects ( ) {
2013-08-17 09:23:51 +00:00
ClearTrackedVertexArrays ( ) ;
}
2013-08-23 15:24:51 +00:00
2013-08-21 09:10:56 +00:00
struct DeclTypeInfo {
u32 type ;
2013-09-01 06:38:40 +00:00
const char * name ;
2013-08-21 09:10:56 +00:00
} ;
2013-08-23 15:24:51 +00:00
2013-08-21 09:10:56 +00:00
static const DeclTypeInfo VComp [ ] = {
2013-09-01 06:38:40 +00:00
{ 0 , " NULL " } , // DEC_NONE,
2013-09-15 10:46:14 +00:00
{ D3DDECLTYPE_FLOAT1 , " D3DDECLTYPE_FLOAT1 " } , // DEC_FLOAT_1,
{ D3DDECLTYPE_FLOAT2 , " D3DDECLTYPE_FLOAT2 " } , // DEC_FLOAT_2,
{ D3DDECLTYPE_FLOAT3 , " D3DDECLTYPE_FLOAT3 " } , // DEC_FLOAT_3,
{ D3DDECLTYPE_FLOAT4 , " D3DDECLTYPE_FLOAT4 " } , // DEC_FLOAT_4,
// Not supported in regular DX9 so faking, will cause graphics bugs until worked around
{ D3DDECLTYPE_UBYTE4 , " D3DDECLTYPE_BYTE4N " } , // DEC_S8_3,
2013-09-01 06:38:40 +00:00
{ D3DDECLTYPE_SHORT4N , " D3DDECLTYPE_SHORT4N " } , // DEC_S16_3,
{ D3DDECLTYPE_UBYTE4N , " D3DDECLTYPE_UBYTE4N " } , // DEC_U8_1,
{ D3DDECLTYPE_UBYTE4N , " D3DDECLTYPE_UBYTE4N " } , // DEC_U8_2,
{ D3DDECLTYPE_UBYTE4N , " D3DDECLTYPE_UBYTE4N " } , // DEC_U8_3,
{ D3DDECLTYPE_UBYTE4N , " D3DDECLTYPE_UBYTE4N " } , // DEC_U8_4,
2014-08-22 19:27:13 +00:00
{ D3DDECLTYPE_USHORT2N , " D3DDECLTYPE_USHORT2N " } , // DEC_U16_1,
{ D3DDECLTYPE_USHORT2N , " D3DDECLTYPE_USHORT2N " } , // DEC_U16_2,
2013-09-15 10:46:14 +00:00
{ D3DDECLTYPE_USHORT4N , " D3DDECLTYPE_USHORT4N " } , // DEC_U16_3,
{ D3DDECLTYPE_USHORT4N , " D3DDECLTYPE_USHORT4N " } , // DEC_U16_4,
// Not supported in regular DX9 so faking, will cause graphics bugs until worked around
{ D3DDECLTYPE_UBYTE4 , " D3DDECLTYPE_BYTE4 " } , // DEC_U8A_2,
2014-08-22 19:27:13 +00:00
{ D3DDECLTYPE_USHORT2N , " D3DDECLTYPE_USHORT4 " } , // DEC_U16A_2,
2013-08-21 09:10:56 +00:00
} ;
static void VertexAttribSetup ( D3DVERTEXELEMENT9 * VertexElement , u8 fmt , u8 offset , u8 usage , u8 usage_index = 0 ) {
memset ( VertexElement , 0 , sizeof ( D3DVERTEXELEMENT9 ) ) ;
VertexElement - > Offset = offset ;
2014-08-26 14:37:19 +00:00
VertexElement - > Type = VComp [ fmt ] . type ;
2013-08-21 09:10:56 +00:00
VertexElement - > Usage = usage ;
VertexElement - > UsageIndex = usage_index ;
}
// TODO: Use VBO and get rid of the vertexData pointers - with that, we will supply only offsets
2013-09-01 06:38:40 +00:00
static void LogDecFmtForDraw ( const DecVtxFormat & decFmt ) {
2013-08-21 09:10:56 +00:00
// Vertices Elements orders
// WEIGHT
if ( decFmt . w0fmt ! = 0 ) {
2013-09-01 06:38:40 +00:00
printf ( " decFmt.w0fmt -> %s (%d) \n " , VComp [ decFmt . w0fmt ] . name , decFmt . w0off ) ;
2013-08-21 09:10:56 +00:00
}
if ( decFmt . w1fmt ! = 0 ) {
2013-09-01 06:38:40 +00:00
printf ( " decFmt.w1fmt -> %s (%d) \n " , VComp [ decFmt . w1fmt ] . name , decFmt . w1off ) ;
2013-08-21 09:10:56 +00:00
}
// TC
if ( decFmt . uvfmt ! = 0 ) {
2013-09-01 06:38:40 +00:00
printf ( " decFmt.uvfmt -> %s (%d) \n " , VComp [ decFmt . uvfmt ] . name , decFmt . uvoff ) ;
2013-08-21 09:10:56 +00:00
}
// COLOR
if ( decFmt . c0fmt ! = 0 ) {
2013-09-01 06:38:40 +00:00
printf ( " decFmt.c0fmt -> %s (%d) \n " , VComp [ decFmt . c0fmt ] . name , decFmt . c0off ) ;
2013-08-21 09:10:56 +00:00
}
// NORMAL
if ( decFmt . nrmfmt ! = 0 ) {
2013-09-01 06:38:40 +00:00
printf ( " decFmt.nrmfmt -> %s (%d) \n " , VComp [ decFmt . nrmfmt ] . name , decFmt . nrmoff ) ;
2013-08-21 09:10:56 +00:00
}
// POSITION
// Always
2013-09-01 06:38:40 +00:00
printf ( " decFmt.posfmt -> %s (%d) \n " , VComp [ decFmt . posfmt ] . name , decFmt . posoff ) ;
2013-08-21 09:10:56 +00:00
2013-09-01 06:38:40 +00:00
printf ( " decFmt.stride => %d \n " , decFmt . stride ) ;
//pD3Ddevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_WIREFRAME);
}
2014-08-22 19:27:13 +00:00
2014-09-10 12:07:30 +00:00
IDirect3DVertexDeclaration9 * TransformDrawEngineDX9 : : SetupDecFmtForDraw ( VSShader * vshader , const DecVtxFormat & decFmt , u32 pspFmt ) {
2014-09-07 20:07:12 +00:00
auto vertexDeclCached = vertexDeclMap_ . find ( pspFmt ) ;
2013-09-01 06:38:40 +00:00
2014-09-07 20:07:12 +00:00
if ( vertexDeclCached = = vertexDeclMap_ . end ( ) ) {
D3DVERTEXELEMENT9 VertexElements [ 8 ] ;
D3DVERTEXELEMENT9 * VertexElement = & VertexElements [ 0 ] ;
2013-09-01 06:38:40 +00:00
// Vertices Elements orders
// WEIGHT
if ( decFmt . w0fmt ! = 0 ) {
2014-09-10 13:20:57 +00:00
VertexAttribSetup ( VertexElement , decFmt . w0fmt , decFmt . w0off , D3DDECLUSAGE_TEXCOORD , 1 ) ;
2013-09-01 06:38:40 +00:00
VertexElement + + ;
}
if ( decFmt . w1fmt ! = 0 ) {
2014-09-10 13:20:57 +00:00
VertexAttribSetup ( VertexElement , decFmt . w1fmt , decFmt . w1off , D3DDECLUSAGE_TEXCOORD , 2 ) ;
2013-09-01 06:38:40 +00:00
VertexElement + + ;
}
2013-08-21 09:10:56 +00:00
2013-09-01 06:38:40 +00:00
// TC
if ( decFmt . uvfmt ! = 0 ) {
2014-09-08 22:42:12 +00:00
VertexAttribSetup ( VertexElement , decFmt . uvfmt , decFmt . uvoff , D3DDECLUSAGE_TEXCOORD , 0 ) ;
2013-09-01 06:38:40 +00:00
VertexElement + + ;
}
// COLOR
if ( decFmt . c0fmt ! = 0 ) {
2014-08-25 08:16:49 +00:00
VertexAttribSetup ( VertexElement , decFmt . c0fmt , decFmt . c0off , D3DDECLUSAGE_COLOR , 0 ) ;
2013-09-01 06:38:40 +00:00
VertexElement + + ;
}
// Never used ?
if ( decFmt . c1fmt ! = 0 ) {
2014-08-25 08:16:49 +00:00
VertexAttribSetup ( VertexElement , decFmt . c1fmt , decFmt . c1off , D3DDECLUSAGE_COLOR , 1 ) ;
2013-09-01 06:38:40 +00:00
VertexElement + + ;
}
// NORMAL
if ( decFmt . nrmfmt ! = 0 ) {
VertexAttribSetup ( VertexElement , decFmt . nrmfmt , decFmt . nrmoff , D3DDECLUSAGE_NORMAL , 0 ) ;
VertexElement + + ;
}
// POSITION
// Always
VertexAttribSetup ( VertexElement , decFmt . posfmt , decFmt . posoff , D3DDECLUSAGE_POSITION , 0 ) ;
VertexElement + + ;
// End
D3DVERTEXELEMENT9 end = D3DDECL_END ( ) ;
memcpy ( VertexElement , & end , sizeof ( D3DVERTEXELEMENT9 ) ) ;
2014-09-07 20:07:12 +00:00
// Create declaration
2014-09-08 22:29:01 +00:00
IDirect3DVertexDeclaration9 * pHardwareVertexDecl = nullptr ;
2014-08-22 19:27:13 +00:00
HRESULT hr = pD3Ddevice - > CreateVertexDeclaration ( VertexElements , & pHardwareVertexDecl ) ;
if ( FAILED ( hr ) ) {
// Log
LogDecFmtForDraw ( decFmt ) ;
// DebugBreak();
}
2013-09-01 06:38:40 +00:00
// Add it to map
2014-09-07 20:07:12 +00:00
vertexDeclMap_ [ pspFmt ] = pHardwareVertexDecl ;
return pHardwareVertexDecl ;
2013-09-01 06:38:40 +00:00
} else {
// Set it from map
2014-09-07 20:07:12 +00:00
return vertexDeclCached - > second ;
2013-09-01 06:38:40 +00:00
}
2013-08-21 09:10:56 +00:00
}
2013-08-17 09:23:51 +00:00
// The verts are in the order: BR BL TL TR
static void SwapUVs ( TransformedVertex & a , TransformedVertex & b ) {
float tempu = a . u ;
float tempv = a . v ;
a . u = b . u ;
a . v = b . v ;
b . u = tempu ;
b . v = tempv ;
}
// 2 3 3 2 0 3 2 1
// to to or
// 1 0 0 1 1 2 3 0
// See comment below where this was called before.
/*
static void RotateUV ( TransformedVertex v [ 4 ] ) {
2013-08-23 15:24:51 +00:00
float x1 = v [ 2 ] . x ;
float x2 = v [ 0 ] . x ;
float y1 = v [ 2 ] . y ;
float y2 = v [ 0 ] . y ;
2013-08-17 09:23:51 +00:00
2013-08-23 15:24:51 +00:00
if ( ( x1 < x2 & & y1 < y2 ) | | ( x1 > x2 & & y1 > y2 ) )
SwapUVs ( v [ 1 ] , v [ 3 ] ) ;
2013-08-17 09:23:51 +00:00
} */
static void RotateUVThrough ( TransformedVertex v [ 4 ] ) {
float x1 = v [ 2 ] . x ;
float x2 = v [ 0 ] . x ;
float y1 = v [ 2 ] . y ;
float y2 = v [ 0 ] . y ;
if ( ( x1 < x2 & & y1 > y2 ) | | ( x1 > x2 & & y1 < y2 ) )
SwapUVs ( v [ 1 ] , v [ 3 ] ) ;
}
// Clears on the PSP are best done by drawing a series of vertical strips
// in clear mode. This tries to detect that.
2013-09-15 10:46:14 +00:00
bool TransformDrawEngineDX9 : : IsReallyAClear ( int numVerts ) const {
2013-08-17 09:23:51 +00:00
if ( transformed [ 0 ] . x ! = 0.0f | | transformed [ 0 ] . y ! = 0.0f )
return false ;
u32 matchcolor ;
memcpy ( & matchcolor , transformed [ 0 ] . color0 , 4 ) ;
float matchz = transformed [ 0 ] . z ;
int bufW = gstate_c . curRTWidth ;
int bufH = gstate_c . curRTHeight ;
float prevX = 0.0f ;
for ( int i = 1 ; i < numVerts ; i + + ) {
u32 vcolor ;
memcpy ( & vcolor , transformed [ i ] . color0 , 4 ) ;
if ( vcolor ! = matchcolor | | transformed [ i ] . z ! = matchz )
return false ;
2013-08-23 15:24:51 +00:00
2013-08-17 09:23:51 +00:00
if ( ( i & 1 ) = = 0 ) {
// Top left of a rectangle
if ( transformed [ i ] . y ! = 0 )
return false ;
if ( i > 0 & & transformed [ i ] . x ! = transformed [ i - 1 ] . x )
return false ;
} else {
// Bottom right
if ( transformed [ i ] . y ! = bufH )
return false ;
if ( transformed [ i ] . x < = transformed [ i - 1 ] . x )
return false ;
}
}
// The last vertical strip often extends outside the drawing area.
if ( transformed [ numVerts - 1 ] . x < bufW )
return false ;
return true ;
}
// This is the software transform pipeline, which is necessary for supporting RECT
// primitives correctly, and may be easier to use for debugging than the hardware
// transform pipeline.
// There's code here that simply expands transformed RECTANGLES into plain triangles.
// We're gonna have to keep software transforming RECTANGLES, unless we use a geom shader which we can't on OpenGL ES 2.0.
// Usually, though, these primitives don't use lighting etc so it's no biggie performance wise, but it would be nice to get rid of
// this code.
// Actually, if we find the camera-relative right and down vectors, it might even be possible to add the extra points in pre-transformed
// space and thus make decent use of hardware transform.
// Actually again, single quads could be drawn more efficiently using GL_TRIANGLE_STRIP, no need to duplicate verts as for
// GL_TRIANGLES. Still need to sw transform to compute the extra two corners though.
2013-09-15 10:46:14 +00:00
void TransformDrawEngineDX9 : : SoftwareTransformAndDraw (
2014-09-10 12:07:30 +00:00
int prim , u8 * decoded , int vertexCount , u32 vertType , void * inds , int indexType , const DecVtxFormat & decVtxFormat , int maxIndex ) {
2013-11-14 13:37:58 +00:00
2013-08-23 15:24:51 +00:00
bool throughmode = ( vertType & GE_VTYPE_THROUGH_MASK ) ! = 0 ;
bool lmode = gstate . isUsingSecondaryColor ( ) & & gstate . isLightingEnabled ( ) ;
2013-08-17 09:23:51 +00:00
2013-08-23 15:24:51 +00:00
// TODO: Split up into multiple draw calls for GLES 2.0 where you can't guarantee support for more than 0x10000 verts.
float uscale = 1.0f ;
float vscale = 1.0f ;
if ( throughmode ) {
uscale / = gstate_c . curTextureWidth ;
vscale / = gstate_c . curTextureHeight ;
}
2013-08-17 09:23:51 +00:00
2013-08-23 15:24:51 +00:00
int w = gstate . getTextureWidth ( 0 ) ;
int h = gstate . getTextureHeight ( 0 ) ;
float widthFactor = ( float ) w / ( float ) gstate_c . curTextureWidth ;
float heightFactor = ( float ) h / ( float ) gstate_c . curTextureHeight ;
2013-08-17 09:23:51 +00:00
2013-12-29 20:57:30 +00:00
Lighter lighter ( vertType ) ;
2013-08-23 15:24:51 +00:00
float fog_end = getFloat24 ( gstate . fog1 ) ;
float fog_slope = getFloat24 ( gstate . fog2 ) ;
2013-08-17 09:23:51 +00:00
2013-08-23 15:24:51 +00:00
VertexReader reader ( decoded , decVtxFormat , vertType ) ;
for ( int index = 0 ; index < maxIndex ; index + + ) {
reader . Goto ( index ) ;
2013-08-17 09:23:51 +00:00
2013-08-23 15:24:51 +00:00
float v [ 3 ] = { 0 , 0 , 0 } ;
float c0 [ 4 ] = { 1 , 1 , 1 , 1 } ;
float c1 [ 4 ] = { 0 , 0 , 0 , 0 } ;
2013-11-15 13:24:25 +00:00
float uv [ 3 ] = { 0 , 0 , 1 } ;
2013-08-23 15:24:51 +00:00
float fogCoef = 1.0f ;
2013-08-17 09:23:51 +00:00
2013-08-23 15:24:51 +00:00
if ( throughmode ) {
// Do not touch the coordinates or the colors. No lighting.
reader . ReadPos ( v ) ;
if ( reader . hasColor0 ( ) ) {
reader . ReadColor0 ( c0 ) ;
for ( int j = 0 ; j < 4 ; j + + ) {
c1 [ j ] = 0.0f ;
}
} else {
2014-08-25 06:34:48 +00:00
c0 [ 0 ] = gstate . getMaterialAmbientR ( ) / 255.f ;
c0 [ 1 ] = gstate . getMaterialAmbientG ( ) / 255.f ;
c0 [ 2 ] = gstate . getMaterialAmbientB ( ) / 255.f ;
c0 [ 3 ] = gstate . getMaterialAmbientA ( ) / 255.f ;
2013-08-17 09:23:51 +00:00
}
2013-08-23 15:24:51 +00:00
if ( reader . hasUV ( ) ) {
reader . ReadUV ( uv ) ;
2013-08-17 09:23:51 +00:00
2013-08-23 15:24:51 +00:00
uv [ 0 ] * = uscale ;
uv [ 1 ] * = vscale ;
2013-08-17 09:23:51 +00:00
}
2013-08-23 15:24:51 +00:00
fogCoef = 1.0f ;
// Scale UV?
2013-08-17 09:23:51 +00:00
} else {
2013-08-23 15:24:51 +00:00
// We do software T&L for now
2014-08-25 14:43:19 +00:00
float out [ 3 ] ;
float pos [ 3 ] ;
2013-08-23 15:24:51 +00:00
Vec3f normal ( 0 , 0 , 1 ) ;
2014-08-25 14:43:19 +00:00
Vec3f worldnormal ( 0 , 0 , 1 ) ;
2013-08-23 15:24:51 +00:00
reader . ReadPos ( pos ) ;
2013-11-15 13:24:25 +00:00
if ( ! vertTypeIsSkinningEnabled ( vertType ) ) {
2013-08-23 15:24:51 +00:00
Vec3ByMatrix43 ( out , pos , gstate . worldMatrix ) ;
if ( reader . hasNormal ( ) ) {
2014-08-25 14:43:19 +00:00
reader . ReadNrm ( normal . AsArray ( ) ) ;
if ( gstate . areNormalsReversed ( ) ) {
normal = - normal ;
}
Norm3ByMatrix43 ( worldnormal . AsArray ( ) , normal . AsArray ( ) , gstate . worldMatrix ) ;
worldnormal = worldnormal . Normalized ( ) ;
2013-08-23 15:24:51 +00:00
}
} else {
float weights [ 8 ] ;
reader . ReadWeights ( weights ) ;
2014-08-25 14:43:19 +00:00
if ( reader . hasNormal ( ) )
reader . ReadNrm ( normal . AsArray ( ) ) ;
2013-08-23 15:24:51 +00:00
// Skinning
Vec3f psum ( 0 , 0 , 0 ) ;
Vec3f nsum ( 0 , 0 , 0 ) ;
2013-11-15 13:24:25 +00:00
for ( int i = 0 ; i < vertTypeGetNumBoneWeights ( vertType ) ; i + + ) {
2013-08-23 15:24:51 +00:00
if ( weights [ i ] ! = 0.0f ) {
Vec3ByMatrix43 ( out , pos , gstate . boneMatrix + i * 12 ) ;
Vec3f tpos ( out ) ;
psum + = tpos * weights [ i ] ;
if ( reader . hasNormal ( ) ) {
2014-08-25 14:43:19 +00:00
Vec3f norm ;
Norm3ByMatrix43 ( norm . AsArray ( ) , normal . AsArray ( ) , gstate . boneMatrix + i * 12 ) ;
nsum + = norm * weights [ i ] ;
2013-08-23 15:24:51 +00:00
}
2013-08-17 09:23:51 +00:00
}
}
2013-08-23 15:24:51 +00:00
// Yes, we really must multiply by the world matrix too.
Vec3ByMatrix43 ( out , psum . AsArray ( ) , gstate . worldMatrix ) ;
if ( reader . hasNormal ( ) ) {
2014-08-25 14:43:19 +00:00
normal = nsum ;
if ( gstate . areNormalsReversed ( ) ) {
normal = - normal ;
}
Norm3ByMatrix43 ( worldnormal . AsArray ( ) , normal . AsArray ( ) , gstate . worldMatrix ) ;
worldnormal = worldnormal . Normalized ( ) ;
2013-08-23 15:24:51 +00:00
}
2013-08-17 09:23:51 +00:00
}
2013-08-23 15:24:51 +00:00
// Perform lighting here if enabled. don't need to check through, it's checked above.
float unlitColor [ 4 ] = { 1 , 1 , 1 , 1 } ;
if ( reader . hasColor0 ( ) ) {
reader . ReadColor0 ( unlitColor ) ;
2013-08-17 09:23:51 +00:00
} else {
2014-08-25 06:34:48 +00:00
unlitColor [ 0 ] = gstate . getMaterialAmbientR ( ) / 255.f ;
unlitColor [ 1 ] = gstate . getMaterialAmbientG ( ) / 255.f ;
unlitColor [ 2 ] = gstate . getMaterialAmbientB ( ) / 255.f ;
unlitColor [ 3 ] = gstate . getMaterialAmbientA ( ) / 255.f ;
2013-08-17 09:23:51 +00:00
}
2013-08-23 15:24:51 +00:00
if ( gstate . isLightingEnabled ( ) ) {
2014-08-25 14:43:19 +00:00
float litColor0 [ 4 ] ;
float litColor1 [ 4 ] ;
lighter . Light ( litColor0 , litColor1 , unlitColor , out , worldnormal ) ;
2013-08-23 15:24:51 +00:00
// Don't ignore gstate.lmode - we should send two colors in that case
2013-08-17 09:23:51 +00:00
for ( int j = 0 ; j < 4 ; j + + ) {
2013-08-23 15:24:51 +00:00
c0 [ j ] = litColor0 [ j ] ;
}
if ( lmode ) {
// Separate colors
for ( int j = 0 ; j < 4 ; j + + ) {
c1 [ j ] = litColor1 [ j ] ;
}
} else {
// Summed color into c0
for ( int j = 0 ; j < 4 ; j + + ) {
c0 [ j ] = ( ( c0 [ j ] + litColor1 [ j ] ) > 1.0f ) ? 1.0f : ( c0 [ j ] + litColor1 [ j ] ) ;
}
2013-08-17 09:23:51 +00:00
}
} else {
2013-08-23 15:24:51 +00:00
if ( reader . hasColor0 ( ) ) {
for ( int j = 0 ; j < 4 ; j + + ) {
c0 [ j ] = unlitColor [ j ] ;
}
} else {
2014-08-25 06:34:48 +00:00
c0 [ 0 ] = gstate . getMaterialAmbientR ( ) / 255.f ;
c0 [ 1 ] = gstate . getMaterialAmbientG ( ) / 255.f ;
c0 [ 2 ] = gstate . getMaterialAmbientB ( ) / 255.f ;
c0 [ 3 ] = gstate . getMaterialAmbientA ( ) / 255.f ;
2013-08-23 15:24:51 +00:00
}
if ( lmode ) {
for ( int j = 0 ; j < 4 ; j + + ) {
c1 [ j ] = 0.0f ;
}
2013-08-17 09:23:51 +00:00
}
}
2013-08-23 15:24:51 +00:00
float ruv [ 2 ] = { 0.0f , 0.0f } ;
if ( reader . hasUV ( ) )
reader . ReadUV ( ruv ) ;
2013-08-17 09:23:51 +00:00
2013-08-23 15:24:51 +00:00
// Perform texture coordinate generation after the transform and lighting - one style of UV depends on lights.
2013-09-10 20:35:38 +00:00
switch ( gstate . getUVGenMode ( ) ) {
2013-09-04 09:19:36 +00:00
case GE_TEXMAP_TEXTURE_COORDS : // UV mapping
case GE_TEXMAP_UNKNOWN : // Seen in Riviera. Unsure of meaning, but this works.
2013-08-23 15:24:51 +00:00
// Texture scale/offset is only performed in this mode.
uv [ 0 ] = uscale * ( ruv [ 0 ] * gstate_c . uv . uScale + gstate_c . uv . uOff ) ;
uv [ 1 ] = vscale * ( ruv [ 1 ] * gstate_c . uv . vScale + gstate_c . uv . vOff ) ;
uv [ 2 ] = 1.0f ;
break ;
2013-09-10 20:35:38 +00:00
2013-09-04 09:19:36 +00:00
case GE_TEXMAP_TEXTURE_MATRIX :
2013-08-17 09:23:51 +00:00
{
2013-08-23 15:24:51 +00:00
// Projection mapping
Vec3f source ;
2013-09-10 20:35:38 +00:00
switch ( gstate . getUVProjMode ( ) ) {
2013-09-04 09:19:36 +00:00
case GE_PROJMAP_POSITION : // Use model space XYZ as source
2013-08-23 15:24:51 +00:00
source = pos ;
break ;
2013-09-10 20:35:38 +00:00
2013-09-04 09:19:36 +00:00
case GE_PROJMAP_UV : // Use unscaled UV as source
2013-08-23 15:24:51 +00:00
source = Vec3f ( ruv [ 0 ] , ruv [ 1 ] , 0.0f ) ;
break ;
2013-09-10 20:35:38 +00:00
2013-09-04 09:19:36 +00:00
case GE_PROJMAP_NORMALIZED_NORMAL : // Use normalized normal as source
2014-08-25 14:43:19 +00:00
source = normal . Normalized ( ) ;
if ( ! reader . hasNormal ( ) ) {
2013-08-23 15:24:51 +00:00
ERROR_LOG_REPORT ( G3D , " Normal projection mapping without normal? " ) ;
}
break ;
2013-09-10 20:35:38 +00:00
2013-09-04 09:19:36 +00:00
case GE_PROJMAP_NORMAL : // Use non-normalized normal as source!
2014-08-25 14:43:19 +00:00
source = normal ;
if ( ! reader . hasNormal ( ) ) {
2013-08-23 15:24:51 +00:00
ERROR_LOG_REPORT ( G3D , " Normal projection mapping without normal? " ) ;
}
break ;
2013-08-17 09:23:51 +00:00
}
2013-08-23 15:24:51 +00:00
float uvw [ 3 ] ;
Vec3ByMatrix43 ( uvw , & source . x , gstate . tgenMatrix ) ;
uv [ 0 ] = uvw [ 0 ] ;
uv [ 1 ] = uvw [ 1 ] ;
uv [ 2 ] = uvw [ 2 ] ;
2013-08-17 09:23:51 +00:00
}
2013-08-23 15:24:51 +00:00
break ;
2013-09-10 20:35:38 +00:00
2013-09-04 09:19:36 +00:00
case GE_TEXMAP_ENVIRONMENT_MAP :
2013-08-23 15:24:51 +00:00
// Shade mapping - use two light sources to generate U and V.
{
2014-08-25 14:43:19 +00:00
Vec3f lightpos0 = Vec3f ( & lighter . lpos [ gstate . getUVLS0 ( ) * 3 ] ) . Normalized ( ) ;
Vec3f lightpos1 = Vec3f ( & lighter . lpos [ gstate . getUVLS1 ( ) * 3 ] ) . Normalized ( ) ;
2013-08-17 09:23:51 +00:00
2014-08-25 14:43:19 +00:00
uv [ 0 ] = ( 1.0f + Dot ( lightpos0 , worldnormal ) ) / 2.0f ;
uv [ 1 ] = ( 1.0f - Dot ( lightpos1 , worldnormal ) ) / 2.0f ;
2013-08-23 15:24:51 +00:00
uv [ 2 ] = 1.0f ;
}
break ;
2013-09-10 20:35:38 +00:00
2013-08-23 15:24:51 +00:00
default :
// Illegal
2013-09-10 20:35:38 +00:00
ERROR_LOG_REPORT ( G3D , " Impossible UV gen mode? %d " , gstate . getUVGenMode ( ) ) ;
2013-08-23 15:24:51 +00:00
break ;
2013-08-17 09:23:51 +00:00
}
2013-11-15 13:24:25 +00:00
2013-08-23 15:24:51 +00:00
uv [ 0 ] = uv [ 0 ] * widthFactor ;
uv [ 1 ] = uv [ 1 ] * heightFactor ;
2013-08-17 09:23:51 +00:00
2013-08-23 15:24:51 +00:00
// Transform the coord by the view matrix.
Vec3ByMatrix43 ( v , out , gstate . viewMatrix ) ;
fogCoef = ( v [ 2 ] + fog_end ) * fog_slope ;
2013-08-17 09:23:51 +00:00
}
2013-08-23 15:24:51 +00:00
// TODO: Write to a flexible buffer, we don't always need all four components.
memcpy ( & transformed [ index ] . x , v , 3 * sizeof ( float ) ) ;
transformed [ index ] . fog = fogCoef ;
memcpy ( & transformed [ index ] . u , uv , 3 * sizeof ( float ) ) ;
if ( gstate_c . flipTexture ) {
2013-08-17 09:23:51 +00:00
transformed [ index ] . v = 1.0f - transformed [ index ] . v ;
2013-08-23 15:24:51 +00:00
}
for ( int i = 0 ; i < 4 ; i + + ) {
transformed [ index ] . color0 [ i ] = c0 [ i ] * 255.0f ;
}
for ( int i = 0 ; i < 3 ; i + + ) {
transformed [ index ] . color1 [ i ] = c1 [ i ] * 255.0f ;
}
2013-08-17 09:23:51 +00:00
}
2013-08-23 15:24:51 +00:00
// Step 2: expand rectangles.
const TransformedVertex * drawBuffer = transformed ;
int numTrans = 0 ;
2013-08-17 09:23:51 +00:00
2013-08-23 15:24:51 +00:00
bool drawIndexed = false ;
2013-08-17 09:23:51 +00:00
2013-08-23 15:24:51 +00:00
if ( prim ! = GE_PRIM_RECTANGLES ) {
// We can simply draw the unexpanded buffer.
numTrans = vertexCount ;
drawIndexed = true ;
} else {
numTrans = 0 ;
drawBuffer = transformedExpanded ;
TransformedVertex * trans = & transformedExpanded [ 0 ] ;
TransformedVertex saved ;
2013-11-15 13:24:25 +00:00
u32 stencilValue ;
2013-08-23 15:24:51 +00:00
for ( int i = 0 ; i < vertexCount ; i + = 2 ) {
int index = ( ( const u16 * ) inds ) [ i ] ;
saved = transformed [ index ] ;
int index2 = ( ( const u16 * ) inds ) [ i + 1 ] ;
TransformedVertex & transVtx = transformed [ index2 ] ;
2013-11-15 13:24:25 +00:00
if ( i = = 0 )
stencilValue = transVtx . color0 [ 3 ] ;
2013-08-23 15:24:51 +00:00
// We have to turn the rectangle into two triangles, so 6 points. Sigh.
// bottom right
trans [ 0 ] = transVtx ;
// bottom left
trans [ 1 ] = transVtx ;
trans [ 1 ] . y = saved . y ;
trans [ 1 ] . v = saved . v ;
// top left
trans [ 2 ] = transVtx ;
trans [ 2 ] . x = saved . x ;
trans [ 2 ] . y = saved . y ;
trans [ 2 ] . u = saved . u ;
trans [ 2 ] . v = saved . v ;
// top right
trans [ 3 ] = transVtx ;
trans [ 3 ] . x = saved . x ;
trans [ 3 ] . u = saved . u ;
// That's the four corners. Now process UV rotation.
if ( throughmode )
RotateUVThrough ( trans ) ;
// Apparently, non-through RotateUV just breaks things.
// If we find a game where it helps, we'll just have to figure out how they differ.
// Possibly, it has something to do with flipped viewport Y axis, which a few games use.
2013-11-15 13:24:25 +00:00
// One game might be one of the Metal Gear ones, can't find the issue right now though.
2013-08-23 15:24:51 +00:00
// else
// RotateUV(trans);
// bottom right
trans [ 4 ] = trans [ 0 ] ;
// top left
trans [ 5 ] = trans [ 2 ] ;
trans + = 6 ;
numTrans + = 6 ;
}
2013-11-15 13:24:25 +00:00
// We don't know the color until here, so we have to do it now, instead of in StateMapping.
// Might want to reconsider the order of things later...
if ( gstate . isModeClear ( ) & & gstate . isClearModeAlphaMask ( ) ) {
dxstate . stencilFunc . set ( D3DCMP_ALWAYS , stencilValue , 255 ) ;
}
2013-08-17 09:23:51 +00:00
}
2013-08-20 16:47:11 +00:00
2013-08-23 15:24:51 +00:00
// TODO: Add a post-transform cache here for multi-RECTANGLES only.
// Might help for text drawing.
2013-08-20 16:47:11 +00:00
2013-08-23 15:24:51 +00:00
// these spam the gDebugger log.
const int vertexSize = sizeof ( transformed [ 0 ] ) ;
2013-08-20 16:47:11 +00:00
2013-08-23 15:24:51 +00:00
pD3Ddevice - > SetVertexDeclaration ( pSoftVertexDecl ) ;
2013-08-20 16:47:11 +00:00
2013-08-23 15:24:51 +00:00
/// Debug !!
//pD3Ddevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_WIREFRAME);
2013-12-03 15:53:30 +00:00
if ( drawIndexed ) {
pD3Ddevice - > DrawIndexedPrimitiveUP ( glprim [ prim ] , 0 , vertexCount , D3DPrimCount ( glprim [ prim ] , numTrans ) , inds , D3DFMT_INDEX16 , drawBuffer , sizeof ( TransformedVertex ) ) ;
} else {
pD3Ddevice - > DrawPrimitiveUP ( glprim [ prim ] , D3DPrimCount ( glprim [ prim ] , numTrans ) , drawBuffer , sizeof ( TransformedVertex ) ) ;
}
2013-08-20 16:47:11 +00:00
}
2014-09-10 08:28:44 +00:00
VertexDecoder * TransformDrawEngineDX9 : : GetVertexDecoder ( u32 vtype ) {
2013-08-17 09:23:51 +00:00
auto iter = decoderMap_ . find ( vtype ) ;
if ( iter ! = decoderMap_ . end ( ) )
return iter - > second ;
2014-09-10 08:28:44 +00:00
VertexDecoder * dec = new VertexDecoder ( ) ;
dec - > SetVertexType ( vtype , decOptions_ ) ;
2013-08-17 09:23:51 +00:00
decoderMap_ [ vtype ] = dec ;
return dec ;
}
2013-09-15 10:46:14 +00:00
void TransformDrawEngineDX9 : : SetupVertexDecoder ( u32 vertType ) {
2013-08-17 09:23:51 +00:00
// If vtype has changed, setup the vertex decoder.
// TODO: Simply cache the setup decoders instead.
if ( vertType ! = lastVType_ ) {
dec_ = GetVertexDecoder ( vertType ) ;
lastVType_ = vertType ;
}
}
2013-09-15 10:46:14 +00:00
int TransformDrawEngineDX9 : : EstimatePerVertexCost ( ) {
2013-08-17 09:23:51 +00:00
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
// runs in parallel with transform.
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
// went too fast and starts doing all the work over again).
int cost = 20 ;
if ( gstate . isLightingEnabled ( ) ) {
cost + = 10 ;
}
for ( int i = 0 ; i < 4 ; i + + ) {
if ( gstate . isLightChanEnabled ( i ) )
cost + = 10 ;
}
2013-09-04 09:19:36 +00:00
if ( gstate . getUVGenMode ( ) ! = GE_TEXMAP_TEXTURE_COORDS ) {
2013-08-17 09:23:51 +00:00
cost + = 20 ;
}
if ( dec_ & & dec_ - > morphcount > 1 ) {
cost + = 5 * dec_ - > morphcount ;
}
return cost ;
}
2013-09-15 10:46:14 +00:00
void TransformDrawEngineDX9 : : SubmitPrim ( void * verts , void * inds , GEPrimitiveType prim , int vertexCount , u32 vertType , int forceIndexType , int * bytesRead ) {
2013-08-17 09:23:51 +00:00
if ( vertexCount = = 0 )
return ; // we ignore zero-sized draw calls.
2013-10-27 21:43:58 +00:00
if ( ! indexGen . PrimCompatible ( prevPrim_ , prim ) | | numDrawCalls > = MAX_DEFERRED_DRAW_CALLS | | vertexCountInDrawCalls + vertexCount > VERTEX_BUFFER_MAX )
2013-08-17 09:23:51 +00:00
Flush ( ) ;
2013-09-10 20:35:38 +00:00
// TODO: Is this the right thing to do?
if ( prim = = GE_PRIM_KEEP_PREVIOUS ) {
prim = prevPrim_ ;
}
2013-08-17 09:23:51 +00:00
prevPrim_ = prim ;
2013-09-10 20:35:38 +00:00
2013-08-17 09:23:51 +00:00
SetupVertexDecoder ( vertType ) ;
dec_ - > IncrementStat ( STAT_VERTSSUBMITTED , vertexCount ) ;
if ( bytesRead )
* bytesRead = vertexCount * dec_ - > VertexSize ( ) ;
gpuStats . numDrawCalls + + ;
gpuStats . numVertsSubmitted + = vertexCount ;
DeferredDrawCall & dc = drawCalls [ numDrawCalls ] ;
dc . verts = verts ;
dc . inds = inds ;
dc . vertType = vertType ;
dc . indexType = ( ( forceIndexType = = - 1 ) ? ( vertType & GE_VTYPE_IDX_MASK ) : forceIndexType ) > > GE_VTYPE_IDX_SHIFT ;
dc . prim = prim ;
dc . vertexCount = vertexCount ;
if ( inds ) {
GetIndexBounds ( inds , vertexCount , vertType , & dc . indexLowerBound , & dc . indexUpperBound ) ;
} else {
dc . indexLowerBound = 0 ;
dc . indexUpperBound = vertexCount - 1 ;
}
if ( uvScale ) {
uvScale [ numDrawCalls ] = gstate_c . uv ;
}
numDrawCalls + + ;
2013-10-27 21:43:58 +00:00
vertexCountInDrawCalls + = vertexCount ;
2013-08-17 09:23:51 +00:00
}
2013-09-15 10:46:14 +00:00
void TransformDrawEngineDX9 : : DecodeVerts ( ) {
2013-11-15 13:24:25 +00:00
UVScale origUV ;
if ( uvScale )
origUV = gstate_c . uv ;
2013-08-17 09:23:51 +00:00
for ( int i = 0 ; i < numDrawCalls ; i + + ) {
const DeferredDrawCall & dc = drawCalls [ i ] ;
indexGen . SetIndex ( collectedVerts ) ;
int indexLowerBound = dc . indexLowerBound , indexUpperBound = dc . indexUpperBound ;
u32 indexType = dc . indexType ;
void * inds = dc . inds ;
if ( indexType = = GE_VTYPE_IDX_NONE > > GE_VTYPE_IDX_SHIFT ) {
// Decode the verts and apply morphing. Simple.
if ( uvScale )
gstate_c . uv = uvScale [ i ] ;
dec_ - > DecodeVerts ( decoded + collectedVerts * ( int ) dec_ - > GetDecVtxFmt ( ) . stride ,
dc . verts , indexLowerBound , indexUpperBound ) ;
collectedVerts + = indexUpperBound - indexLowerBound + 1 ;
indexGen . AddPrim ( dc . prim , dc . vertexCount ) ;
} else {
// It's fairly common that games issue long sequences of PRIM calls, with differing
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
// these as much as possible, so we make sure here to combine as many as possible
// into one nice big drawcall, sharing data.
// 1. Look ahead to find the max index, only looking as "matching" drawcalls.
// Expand the lower and upper bounds as we go.
int j = i + 1 ;
int lastMatch = i ;
while ( j < numDrawCalls ) {
if ( drawCalls [ j ] . verts ! = dc . verts )
break ;
2013-11-15 13:24:25 +00:00
if ( uvScale & & memcmp ( & uvScale [ j ] , & uvScale [ i ] , sizeof ( uvScale [ 0 ] ) ) ! = 0 )
2013-08-17 09:23:51 +00:00
break ;
indexLowerBound = std : : min ( indexLowerBound , ( int ) drawCalls [ j ] . indexLowerBound ) ;
indexUpperBound = std : : max ( indexUpperBound , ( int ) drawCalls [ j ] . indexUpperBound ) ;
lastMatch = j ;
j + + ;
}
2013-08-23 15:24:51 +00:00
2013-08-17 09:23:51 +00:00
// 2. Loop through the drawcalls, translating indices as we go.
for ( j = i ; j < = lastMatch ; j + + ) {
switch ( indexType ) {
case GE_VTYPE_IDX_8BIT > > GE_VTYPE_IDX_SHIFT :
indexGen . TranslatePrim ( drawCalls [ j ] . prim , drawCalls [ j ] . vertexCount , ( const u8 * ) drawCalls [ j ] . inds , indexLowerBound ) ;
break ;
case GE_VTYPE_IDX_16BIT > > GE_VTYPE_IDX_SHIFT :
indexGen . TranslatePrim ( drawCalls [ j ] . prim , drawCalls [ j ] . vertexCount , ( const u16 * ) drawCalls [ j ] . inds , indexLowerBound ) ;
break ;
}
}
int vertexCount = indexUpperBound - indexLowerBound + 1 ;
// 3. Decode that range of vertex data.
if ( uvScale )
gstate_c . uv = uvScale [ i ] ;
dec_ - > DecodeVerts ( decoded + collectedVerts * ( int ) dec_ - > GetDecVtxFmt ( ) . stride ,
dc . verts , indexLowerBound , indexUpperBound ) ;
collectedVerts + = vertexCount ;
// 4. Advance indexgen vertex counter.
indexGen . Advance ( vertexCount ) ;
i = lastMatch ;
}
}
// Sanity check
if ( indexGen . Prim ( ) < 0 ) {
2013-09-10 20:35:38 +00:00
ERROR_LOG_REPORT ( G3D , " DecodeVerts: Failed to deduce prim: %i " , indexGen . Prim ( ) ) ;
2013-08-17 09:23:51 +00:00
// Force to points (0)
indexGen . AddPrim ( GE_PRIM_POINTS , 0 ) ;
}
2013-11-15 13:24:25 +00:00
if ( uvScale )
gstate_c . uv = origUV ;
2013-08-17 09:23:51 +00:00
}
2013-09-15 10:46:14 +00:00
u32 TransformDrawEngineDX9 : : ComputeHash ( ) {
2013-08-17 09:23:51 +00:00
u32 fullhash = 0 ;
int vertexSize = dec_ - > GetDecVtxFmt ( ) . stride ;
// TODO: Add some caps both for numDrawCalls and num verts to check?
2013-08-20 16:47:11 +00:00
// It is really very expensive to check all the vertex data so often.
2013-08-17 09:23:51 +00:00
for ( int i = 0 ; i < numDrawCalls ; i + + ) {
2013-11-09 22:29:44 +00:00
const DeferredDrawCall & dc = drawCalls [ i ] ;
if ( ! dc . inds ) {
fullhash + = DoReliableHash ( ( const char * ) dc . verts , vertexSize * dc . vertexCount , 0x1DE8CAC4 ) ;
2013-08-17 09:23:51 +00:00
} else {
2013-11-09 22:29:44 +00:00
int indexLowerBound = dc . indexLowerBound , indexUpperBound = dc . indexUpperBound ;
int j = i + 1 ;
int lastMatch = i ;
while ( j < numDrawCalls ) {
if ( drawCalls [ j ] . verts ! = dc . verts )
break ;
indexLowerBound = std : : min ( indexLowerBound , ( int ) dc . indexLowerBound ) ;
indexUpperBound = std : : max ( indexUpperBound , ( int ) dc . indexUpperBound ) ;
lastMatch = j ;
j + + ;
}
2013-08-17 09:23:51 +00:00
// This could get seriously expensive with sparse indices. Need to combine hashing ranges the same way
// we do when drawing.
2013-11-09 22:29:44 +00:00
fullhash + = DoReliableHash ( ( const char * ) dc . verts + vertexSize * indexLowerBound ,
vertexSize * ( indexUpperBound - indexLowerBound ) , 0x029F3EE1 ) ;
2013-08-17 09:23:51 +00:00
int indexSize = ( dec_ - > VertexType ( ) & GE_VTYPE_IDX_MASK ) = = GE_VTYPE_IDX_16BIT ? 2 : 1 ;
2013-11-09 22:29:44 +00:00
// Hm, we will miss some indices when combining above, but meh, it should be fine.
fullhash + = DoReliableHash ( ( const char * ) dc . inds , indexSize * dc . vertexCount , 0x955FD1CA ) ;
i = lastMatch ;
2013-08-17 09:23:51 +00:00
}
}
2013-11-09 22:29:44 +00:00
if ( uvScale ) {
fullhash + = DoReliableHash ( & uvScale [ 0 ] , sizeof ( uvScale [ 0 ] ) * numDrawCalls , 0x0123e658 ) ;
}
2013-08-17 09:23:51 +00:00
return fullhash ;
}
2013-09-15 10:46:14 +00:00
u32 TransformDrawEngineDX9 : : ComputeFastDCID ( ) {
2013-08-17 09:23:51 +00:00
u32 hash = 0 ;
for ( int i = 0 ; i < numDrawCalls ; i + + ) {
hash ^ = ( u32 ) ( uintptr_t ) drawCalls [ i ] . verts ;
hash = __rotl ( hash , 13 ) ;
hash ^ = ( u32 ) ( uintptr_t ) drawCalls [ i ] . inds ;
hash = __rotl ( hash , 13 ) ;
hash ^ = ( u32 ) drawCalls [ i ] . vertType ;
hash = __rotl ( hash , 13 ) ;
hash ^ = ( u32 ) drawCalls [ i ] . vertexCount ;
hash = __rotl ( hash , 13 ) ;
hash ^ = ( u32 ) drawCalls [ i ] . prim ;
}
return hash ;
}
enum { VAI_KILL_AGE = 120 } ;
2013-09-15 10:46:14 +00:00
void TransformDrawEngineDX9 : : ClearTrackedVertexArrays ( ) {
2013-08-17 09:23:51 +00:00
for ( auto vai = vai_ . begin ( ) ; vai ! = vai_ . end ( ) ; vai + + ) {
delete vai - > second ;
}
vai_ . clear ( ) ;
}
2013-09-15 10:46:14 +00:00
void TransformDrawEngineDX9 : : DecimateTrackedVertexArrays ( ) {
2013-08-20 16:47:11 +00:00
if ( - - decimationCounter_ < = 0 ) {
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL ;
} else {
return ;
}
2013-08-17 13:11:27 +00:00
int threshold = gpuStats . numFlips - VAI_KILL_AGE ;
2013-08-17 09:23:51 +00:00
for ( auto iter = vai_ . begin ( ) ; iter ! = vai_ . end ( ) ; ) {
if ( iter - > second - > lastFrame < threshold ) {
delete iter - > second ;
vai_ . erase ( iter + + ) ;
}
else
+ + iter ;
}
// Enable if you want to see vertex decoders in the log output. Need a better way.
#if 0
char buffer [ 16384 ] ;
for ( std : : map < u32 , VertexDecoder * > : : iterator dec = decoderMap_ . begin ( ) ; dec ! = decoderMap_ . end ( ) ; + + dec ) {
char * ptr = buffer ;
ptr + = dec - > second - > ToString ( ptr ) ;
2013-08-23 15:24:51 +00:00
// *ptr++ = '\n';
2013-09-10 20:35:38 +00:00
NOTICE_LOG ( G3D , buffer ) ;
2013-08-17 09:23:51 +00:00
}
# endif
}
2013-09-15 10:46:14 +00:00
VertexArrayInfoDX9 : : ~ VertexArrayInfoDX9 ( ) {
2013-08-17 09:23:51 +00:00
if ( vbo ) {
vbo - > Release ( ) ;
}
if ( ebo ) {
ebo - > Release ( ) ;
}
}
2013-09-15 10:46:14 +00:00
void TransformDrawEngineDX9 : : DoFlush ( ) {
2013-08-17 09:23:51 +00:00
gpuStats . numFlushes + + ;
2013-08-23 15:24:51 +00:00
2013-08-17 09:23:51 +00:00
gpuStats . numTrackedVertexArrays = ( int ) vai_ . size ( ) ;
2013-08-20 16:47:11 +00:00
// This is not done on every drawcall, we should collect vertex data
2013-08-17 09:23:51 +00:00
// until critical state changes. That's when we draw (flush).
2013-09-04 09:19:36 +00:00
GEPrimitiveType prim = prevPrim_ ;
2013-08-17 09:23:51 +00:00
ApplyDrawState ( prim ) ;
2014-09-10 12:07:30 +00:00
VSShader * vshader = shaderManager_ - > ApplyShader ( prim , lastVType_ ) ;
2013-08-21 09:10:56 +00:00
2014-09-10 12:07:30 +00:00
if ( vshader - > UseHWTransform ( ) ) {
2013-08-23 15:24:51 +00:00
LPDIRECT3DVERTEXBUFFER9 vb_ = NULL ;
LPDIRECT3DINDEXBUFFER9 ib_ = NULL ;
int vertexCount = 0 ;
2013-11-15 13:24:25 +00:00
int maxIndex = 0 ;
2013-08-23 15:24:51 +00:00
bool useElements = true ;
// Cannot cache vertex data with morph enabled.
if ( g_Config . bVertexCache & & ! ( lastVType_ & GE_VTYPE_MORPHCOUNT_MASK ) ) {
u32 id = ComputeFastDCID ( ) ;
auto iter = vai_ . find ( id ) ;
2013-09-15 10:46:14 +00:00
VertexArrayInfoDX9 * vai ;
2013-08-23 15:24:51 +00:00
if ( iter ! = vai_ . end ( ) ) {
// We've seen this before. Could have been a cached draw.
vai = iter - > second ;
} else {
2013-09-15 10:46:14 +00:00
vai = new VertexArrayInfoDX9 ( ) ;
2013-08-23 15:24:51 +00:00
vai_ [ id ] = vai ;
}
switch ( vai - > status ) {
2013-09-15 10:46:14 +00:00
case VertexArrayInfoDX9 : : VAI_NEW :
2013-08-23 15:24:51 +00:00
{
// Haven't seen this one before.
u32 dataHash = ComputeHash ( ) ;
vai - > hash = dataHash ;
2013-09-15 10:46:14 +00:00
vai - > status = VertexArrayInfoDX9 : : VAI_HASHING ;
2013-08-23 15:24:51 +00:00
vai - > drawsUntilNextFullHash = 0 ;
DecodeVerts ( ) ; // writes to indexGen
vai - > numVerts = indexGen . VertexCount ( ) ;
vai - > prim = indexGen . Prim ( ) ;
2013-11-15 13:24:25 +00:00
vai - > maxIndex = indexGen . MaxIndex ( ) ;
2014-09-09 08:03:08 +00:00
vai - > flags = gstate_c . vertexFullAlpha ? VAI_FLAG_VERTEXFULLALPHA : 0 ;
2013-08-23 15:24:51 +00:00
goto rotateVBO ;
}
// Hashing - still gaining confidence about the buffer.
// But if we get this far it's likely to be worth creating a vertex buffer.
2013-09-15 10:46:14 +00:00
case VertexArrayInfoDX9 : : VAI_HASHING :
2013-08-23 15:24:51 +00:00
{
vai - > numDraws + + ;
if ( vai - > lastFrame ! = gpuStats . numFlips ) {
vai - > numFrames + + ;
}
if ( vai - > drawsUntilNextFullHash = = 0 ) {
u32 newHash = ComputeHash ( ) ;
if ( newHash ! = vai - > hash ) {
2013-09-15 10:46:14 +00:00
vai - > status = VertexArrayInfoDX9 : : VAI_UNRELIABLE ;
2013-08-23 15:24:51 +00:00
if ( vai - > vbo ) {
vai - > vbo - > Release ( ) ;
vai - > vbo = NULL ;
}
if ( vai - > ebo ) {
vai - > ebo - > Release ( ) ;
vai - > ebo = NULL ;
}
DecodeVerts ( ) ;
goto rotateVBO ;
}
if ( vai - > numVerts > 100 ) {
// exponential backoff up to 16 draws, then every 24
vai - > drawsUntilNextFullHash = std : : min ( 24 , vai - > numFrames ) ;
} else {
// Lower numbers seem much more likely to change.
vai - > drawsUntilNextFullHash = 0 ;
}
// TODO: tweak
//if (vai->numFrames > 1000) {
// vai->status = VertexArrayInfo::VAI_RELIABLE;
//}
} else {
vai - > drawsUntilNextFullHash - - ;
// TODO: "mini-hashing" the first 32 bytes of the vertex/index data or something.
}
if ( vai - > vbo = = 0 ) {
DecodeVerts ( ) ;
vai - > numVerts = indexGen . VertexCount ( ) ;
vai - > prim = indexGen . Prim ( ) ;
2013-11-15 13:24:25 +00:00
vai - > maxIndex = indexGen . MaxIndex ( ) ;
2013-08-23 15:24:51 +00:00
useElements = ! indexGen . SeenOnlyPurePrims ( ) ;
if ( ! useElements & & indexGen . PureCount ( ) ) {
vai - > numVerts = indexGen . PureCount ( ) ;
}
// Always
if ( 1 ) {
void * pVb ;
u32 size = dec_ - > GetDecVtxFmt ( ) . stride * indexGen . MaxIndex ( ) ;
pD3Ddevice - > CreateVertexBuffer ( size , NULL , NULL , D3DPOOL_DEFAULT , & vai - > vbo , NULL ) ;
vai - > vbo - > Lock ( 0 , size , & pVb , D3DLOCK_NOOVERWRITE ) ;
memcpy ( pVb , decoded , size ) ;
vai - > vbo - > Unlock ( ) ;
}
// Ib
if ( useElements ) {
void * pIb ;
u32 size = sizeof ( short ) * indexGen . VertexCount ( ) ;
pD3Ddevice - > CreateIndexBuffer ( size , NULL , D3DFMT_INDEX16 , D3DPOOL_DEFAULT , & vai - > ebo , NULL ) ;
vai - > ebo - > Lock ( 0 , size , & pIb , D3DLOCK_NOOVERWRITE ) ;
memcpy ( pIb , decIndex , size ) ;
vai - > ebo - > Unlock ( ) ;
} else {
vai - > ebo = 0 ;
}
} else {
gpuStats . numCachedDrawCalls + + ;
useElements = vai - > ebo ? true : false ;
gpuStats . numCachedVertsDrawn + = vai - > numVerts ;
2014-09-09 08:03:08 +00:00
gstate_c . vertexFullAlpha = vai - > flags & VAI_FLAG_VERTEXFULLALPHA ;
2013-08-23 15:24:51 +00:00
}
vb_ = vai - > vbo ;
ib_ = vai - > ebo ;
vertexCount = vai - > numVerts ;
2013-11-15 13:24:25 +00:00
maxIndex = vai - > maxIndex ;
prim = static_cast < GEPrimitiveType > ( vai - > prim ) ;
2013-08-23 15:24:51 +00:00
break ;
}
// Reliable - we don't even bother hashing anymore. Right now we don't go here until after a very long time.
2013-09-15 10:46:14 +00:00
case VertexArrayInfoDX9 : : VAI_RELIABLE :
2013-08-23 15:24:51 +00:00
{
vai - > numDraws + + ;
if ( vai - > lastFrame ! = gpuStats . numFlips ) {
vai - > numFrames + + ;
}
gpuStats . numCachedDrawCalls + + ;
gpuStats . numCachedVertsDrawn + = vai - > numVerts ;
vb_ = vai - > vbo ;
ib_ = vai - > ebo ;
vertexCount = vai - > numVerts ;
2013-11-15 13:24:25 +00:00
maxIndex = vai - > maxIndex ;
prim = static_cast < GEPrimitiveType > ( vai - > prim ) ;
2014-09-09 08:03:08 +00:00
gstate_c . vertexFullAlpha = vai - > flags & VAI_FLAG_VERTEXFULLALPHA ;
2013-08-23 15:24:51 +00:00
break ;
}
2013-09-15 10:46:14 +00:00
case VertexArrayInfoDX9 : : VAI_UNRELIABLE :
2013-08-23 15:24:51 +00:00
{
vai - > numDraws + + ;
if ( vai - > lastFrame ! = gpuStats . numFlips ) {
vai - > numFrames + + ;
}
DecodeVerts ( ) ;
goto rotateVBO ;
}
}
vai - > lastFrame = gpuStats . numFlips ;
} else {
DecodeVerts ( ) ;
rotateVBO :
gpuStats . numUncachedVertsDrawn + = indexGen . VertexCount ( ) ;
useElements = ! indexGen . SeenOnlyPurePrims ( ) ;
2013-11-15 13:24:25 +00:00
vertexCount = indexGen . VertexCount ( ) ;
maxIndex = indexGen . MaxIndex ( ) ;
2013-08-23 15:24:51 +00:00
if ( ! useElements & & indexGen . PureCount ( ) ) {
vertexCount = indexGen . PureCount ( ) ;
}
prim = indexGen . Prim ( ) ;
}
DEBUG_LOG ( G3D , " Flush prim %i! %i verts in one go " , prim , vertexCount ) ;
2014-09-09 08:03:08 +00:00
bool hasColor = ( lastVType_ & GE_VTYPE_COL_MASK ) ! = GE_VTYPE_COL_NONE ;
if ( gstate . isModeThrough ( ) ) {
gstate_c . vertexFullAlpha = gstate_c . vertexFullAlpha & & ( hasColor | | gstate . getMaterialAmbientA ( ) = = 255 ) ;
} else {
gstate_c . vertexFullAlpha = gstate_c . vertexFullAlpha & & ( ( hasColor & & ( gstate . materialupdate & 1 ) ) | | gstate . getMaterialAmbientA ( ) = = 255 ) & & ( ! gstate . isLightingEnabled ( ) | | gstate . getAmbientA ( ) = = 255 ) ;
}
2013-08-23 15:24:51 +00:00
2014-09-10 12:07:30 +00:00
IDirect3DVertexDeclaration9 * pHardwareVertexDecl = SetupDecFmtForDraw ( vshader , dec_ - > GetDecVtxFmt ( ) , dec_ - > VertexType ( ) ) ;
2013-08-23 15:24:51 +00:00
2014-08-22 19:27:13 +00:00
if ( pHardwareVertexDecl ) {
pD3Ddevice - > SetVertexDeclaration ( pHardwareVertexDecl ) ;
if ( vb_ = = NULL ) {
if ( useElements ) {
pD3Ddevice - > DrawIndexedPrimitiveUP ( glprim [ prim ] , 0 , vertexCount , D3DPrimCount ( glprim [ prim ] , vertexCount ) , decIndex , D3DFMT_INDEX16 , decoded , dec_ - > GetDecVtxFmt ( ) . stride ) ;
} else {
pD3Ddevice - > DrawPrimitiveUP ( glprim [ prim ] , D3DPrimCount ( glprim [ prim ] , vertexCount ) , decoded , dec_ - > GetDecVtxFmt ( ) . stride ) ;
}
2013-08-23 15:24:51 +00:00
} else {
2014-08-22 19:27:13 +00:00
pD3Ddevice - > SetStreamSource ( 0 , vb_ , 0 , dec_ - > GetDecVtxFmt ( ) . stride ) ;
2013-08-23 15:24:51 +00:00
2014-08-22 19:27:13 +00:00
if ( useElements ) {
pD3Ddevice - > SetIndices ( ib_ ) ;
2013-08-23 15:24:51 +00:00
2014-08-22 19:27:13 +00:00
pD3Ddevice - > DrawIndexedPrimitive ( glprim [ prim ] , 0 , 0 , 0 , 0 , D3DPrimCount ( glprim [ prim ] , vertexCount ) ) ;
} else {
pD3Ddevice - > DrawPrimitive ( glprim [ prim ] , 0 , D3DPrimCount ( glprim [ prim ] , vertexCount ) ) ;
}
2013-08-23 15:24:51 +00:00
}
}
2013-08-21 09:10:56 +00:00
} else {
2013-08-23 15:24:51 +00:00
DecodeVerts ( ) ;
2014-09-09 08:03:08 +00:00
bool hasColor = ( lastVType_ & GE_VTYPE_COL_MASK ) ! = GE_VTYPE_COL_NONE ;
if ( gstate . isModeThrough ( ) ) {
gstate_c . vertexFullAlpha = gstate_c . vertexFullAlpha & & ( hasColor | | gstate . getMaterialAmbientA ( ) = = 255 ) ;
} else {
gstate_c . vertexFullAlpha = gstate_c . vertexFullAlpha & & ( ( hasColor & & ( gstate . materialupdate & 1 ) ) | | gstate . getMaterialAmbientA ( ) = = 255 ) & & ( ! gstate . isLightingEnabled ( ) | | gstate . getAmbientA ( ) = = 255 ) ;
}
2013-08-23 15:24:51 +00:00
gpuStats . numUncachedVertsDrawn + = indexGen . VertexCount ( ) ;
prim = indexGen . Prim ( ) ;
// Undo the strip optimization, not supported by the SW code yet.
if ( prim = = GE_PRIM_TRIANGLE_STRIP )
prim = GE_PRIM_TRIANGLES ;
DEBUG_LOG ( G3D , " Flush prim %i SW! %i verts in one go " , prim , indexGen . VertexCount ( ) ) ;
SoftwareTransformAndDraw (
2014-09-10 12:07:30 +00:00
prim , decoded , indexGen . VertexCount ( ) ,
2013-08-23 15:24:51 +00:00
dec_ - > VertexType ( ) , ( void * ) decIndex , GE_VTYPE_IDX_16BIT , dec_ - > GetDecVtxFmt ( ) ,
indexGen . MaxIndex ( ) ) ;
2013-08-21 09:10:56 +00:00
}
2013-08-17 09:23:51 +00:00
2013-08-23 15:24:51 +00:00
indexGen . Reset ( ) ;
collectedVerts = 0 ;
numDrawCalls = 0 ;
2013-10-27 21:43:58 +00:00
vertexCountInDrawCalls = 0 ;
2013-09-04 09:19:36 +00:00
prevPrim_ = GE_PRIM_INVALID ;
2014-09-09 08:03:08 +00:00
gstate_c . vertexFullAlpha = true ;
2013-09-22 07:18:46 +00:00
host - > GPUNotifyDraw ( ) ;
2013-08-17 09:23:51 +00:00
}
2014-08-24 12:21:35 +00:00
2013-11-15 13:24:25 +00:00
bool TransformDrawEngineDX9 : : TestBoundingBox ( void * control_points , int vertexCount , u32 vertType ) {
// Simplify away bones and morph before proceeding
/*
SimpleVertex * corners = ( SimpleVertex * ) ( decoded + 65536 * 12 ) ;
u8 * temp_buffer = decoded + 65536 * 24 ;
u32 origVertType = vertType ;
vertType = NormalizeVertices ( ( u8 * ) corners , temp_buffer , ( u8 * ) control_points , 0 , vertexCount , vertType ) ;
for ( int cube = 0 ; cube < vertexCount / 8 ; cube + + ) {
// For each cube...
for ( int i = 0 ; i < 8 ; i + + ) {
const SimpleVertex & vert = corners [ cube * 8 + i ] ;
// To world space...
float worldPos [ 3 ] ;
Vec3ByMatrix43 ( worldPos , ( float * ) & vert . pos . x , gstate . worldMatrix ) ;
// To view space...
float viewPos [ 3 ] ;
Vec3ByMatrix43 ( viewPos , worldPos , gstate . viewMatrix ) ;
// And finally to screen space.
float frustumPos [ 4 ] ;
Vec3ByMatrix44 ( frustumPos , viewPos , gstate . projMatrix ) ;
// Project to 2D
float x = frustumPos [ 0 ] / frustumPos [ 3 ] ;
float y = frustumPos [ 1 ] / frustumPos [ 3 ] ;
// Rescale 2d position
// ...
}
}
*/
// Let's think. A better approach might be to take the edges of the drawing region and the projection
// matrix to build a frustum pyramid, and then clip the cube against those planes. If all vertices fail the same test,
// the cube is out. Otherwise it's in.
// TODO....
return true ;
}
2013-09-15 15:53:21 +00:00
2014-08-25 05:16:32 +00:00
// TODO: Probably move this to common code (with normalization?)
static Vec3f ClipToScreen ( const Vec4f & coords ) {
// TODO: Check for invalid parameters (x2 < x1, etc)
float vpx1 = getFloat24 ( gstate . viewportx1 ) ;
float vpx2 = getFloat24 ( gstate . viewportx2 ) ;
float vpy1 = getFloat24 ( gstate . viewporty1 ) ;
float vpy2 = getFloat24 ( gstate . viewporty2 ) ;
float vpz1 = getFloat24 ( gstate . viewportz1 ) ;
float vpz2 = getFloat24 ( gstate . viewportz2 ) ;
float retx = coords . x * vpx1 / coords . w + vpx2 ;
float rety = coords . y * vpy1 / coords . w + vpy2 ;
float retz = coords . z * vpz1 / coords . w + vpz2 ;
// 16 = 0xFFFF / 4095.9375
return Vec3f ( retx * 16 , rety * 16 , retz ) ;
}
static Vec3f ScreenToDrawing ( const Vec3f & coords ) {
Vec3f ret ;
ret . x = ( coords . x - gstate . getOffsetX16 ( ) ) * ( 1.0f / 16.0f ) ;
ret . y = ( coords . y - gstate . getOffsetY16 ( ) ) * ( 1.0f / 16.0f ) ;
ret . z = coords . z ;
return ret ;
}
// TODO: This probably is not the best interface.
bool TransformDrawEngineDX9 : : GetCurrentSimpleVertices ( int count , std : : vector < GPUDebugVertex > & vertices , std : : vector < u16 > & indices ) {
// This is always for the current vertices.
u16 indexLowerBound = 0 ;
u16 indexUpperBound = count - 1 ;
bool savedVertexFullAlpha = gstate_c . vertexFullAlpha ;
if ( ( gstate . vertType & GE_VTYPE_IDX_MASK ) ! = GE_VTYPE_IDX_NONE ) {
const u8 * inds = Memory : : GetPointer ( gstate_c . indexAddr ) ;
const u16 * inds16 = ( const u16 * ) inds ;
if ( inds ) {
GetIndexBounds ( inds , count , gstate . vertType , & indexLowerBound , & indexUpperBound ) ;
indices . resize ( count ) ;
switch ( gstate . vertType & GE_VTYPE_IDX_MASK ) {
case GE_VTYPE_IDX_16BIT :
for ( int i = 0 ; i < count ; + + i ) {
indices [ i ] = inds16 [ i ] ;
}
break ;
case GE_VTYPE_IDX_8BIT :
for ( int i = 0 ; i < count ; + + i ) {
indices [ i ] = inds [ i ] ;
}
break ;
default :
return false ;
}
} else {
indices . clear ( ) ;
}
} else {
indices . clear ( ) ;
}
static std : : vector < u32 > temp_buffer ;
static std : : vector < SimpleVertex > simpleVertices ;
temp_buffer . resize ( std : : max ( ( int ) indexUpperBound , 8192 ) * 128 / sizeof ( u32 ) ) ;
simpleVertices . resize ( indexUpperBound + 1 ) ;
NormalizeVertices ( ( u8 * ) ( & simpleVertices [ 0 ] ) , ( u8 * ) ( & temp_buffer [ 0 ] ) , Memory : : GetPointer ( gstate_c . vertexAddr ) , indexLowerBound , indexUpperBound , gstate . vertType ) ;
float world [ 16 ] ;
float view [ 16 ] ;
float worldview [ 16 ] ;
float worldviewproj [ 16 ] ;
ConvertMatrix4x3To4x4 ( world , gstate . worldMatrix ) ;
ConvertMatrix4x3To4x4 ( view , gstate . viewMatrix ) ;
Matrix4ByMatrix4 ( worldview , world , view ) ;
Matrix4ByMatrix4 ( worldviewproj , worldview , gstate . projMatrix ) ;
vertices . resize ( indexUpperBound + 1 ) ;
for ( int i = indexLowerBound ; i < = indexUpperBound ; + + i ) {
const SimpleVertex & vert = simpleVertices [ i ] ;
if ( gstate . isModeThrough ( ) ) {
if ( gstate . vertType & GE_VTYPE_TC_MASK ) {
vertices [ i ] . u = vert . uv [ 0 ] ;
vertices [ i ] . v = vert . uv [ 1 ] ;
} else {
vertices [ i ] . u = 0.0f ;
vertices [ i ] . v = 0.0f ;
}
vertices [ i ] . x = vert . pos . x ;
vertices [ i ] . y = vert . pos . y ;
vertices [ i ] . z = vert . pos . z ;
if ( gstate . vertType & GE_VTYPE_COL_MASK ) {
memcpy ( vertices [ i ] . c , vert . color , sizeof ( vertices [ i ] . c ) ) ;
} else {
memset ( vertices [ i ] . c , 0 , sizeof ( vertices [ i ] . c ) ) ;
}
} else {
float clipPos [ 4 ] ;
Vec3ByMatrix44 ( clipPos , vert . pos . AsArray ( ) , worldviewproj ) ;
Vec3f screenPos = ClipToScreen ( clipPos ) ;
Vec3f drawPos = ScreenToDrawing ( screenPos ) ;
if ( gstate . vertType & GE_VTYPE_TC_MASK ) {
vertices [ i ] . u = vert . uv [ 0 ] ;
vertices [ i ] . v = vert . uv [ 1 ] ;
} else {
vertices [ i ] . u = 0.0f ;
vertices [ i ] . v = 0.0f ;
}
vertices [ i ] . x = drawPos . x ;
vertices [ i ] . y = drawPos . y ;
vertices [ i ] . z = drawPos . z ;
if ( gstate . vertType & GE_VTYPE_COL_MASK ) {
memcpy ( vertices [ i ] . c , vert . color , sizeof ( vertices [ i ] . c ) ) ;
} else {
memset ( vertices [ i ] . c , 0 , sizeof ( vertices [ i ] . c ) ) ;
}
}
}
gstate_c . vertexFullAlpha = savedVertexFullAlpha ;
return true ;
}
2013-11-15 13:24:25 +00:00
} // namespace