mirror of
https://github.com/CTCaer/RetroArch.git
synced 2024-12-23 19:24:46 +00:00
(RGL PS3) We don't use shared parameters for vertex and fragment shader
This commit is contained in:
parent
3c17d18904
commit
000b424d75
@ -3034,17 +3034,9 @@ GLAPI void RGL_EXPORT psglSwap (void)
|
||||
}
|
||||
}
|
||||
|
||||
rglGcmDriver *driver = (rglGcmDriver*)_CurrentDevice->rasterDriver;
|
||||
float * __restrict v = (float*)driver->sharedVPConstants;
|
||||
|
||||
thisContext->current[0] = (((33) << (18)) | CELL_GCM_NV4097_SET_TRANSFORM_CONSTANT_LOAD);
|
||||
thisContext->current[1] = 0;
|
||||
|
||||
memcpy(&thisContext->current[2], v, sizeof(float)*16);
|
||||
memcpy(&thisContext->current[18], &v[16], sizeof(float)*16);
|
||||
thisContext->current += 34;
|
||||
v += 32;
|
||||
|
||||
rglGcmSetDitherEnable(thisContext, RGLGCM_TRUE );
|
||||
|
||||
RGLcontext *context = (RGLcontext*)_CurrentContext;
|
||||
|
@ -118,109 +118,17 @@ template<int SIZE> static void setVectorTypefp( void *dat, const void* __restric
|
||||
|
||||
template<int SIZE> static void setVectorTypeSharedfpIndex (void *data, const void* __restrict v, const int /*index*/ )
|
||||
{
|
||||
CellGcmContextData *thisContext = (CellGcmContextData*)gCellGcmCurrentContext;
|
||||
CgRuntimeParameter *ptr = (CgRuntimeParameter*)data;
|
||||
RGLcontext * LContext = _CurrentContext;
|
||||
rglGcmDriver *driver = (rglGcmDriver*)_CurrentDevice->rasterDriver;
|
||||
const CgParameterResource *parameterResource = rglGetParameterResource( ptr->program, ptr->parameterEntry );
|
||||
unsigned short resource = parameterResource->resource;
|
||||
unsigned short sharedResource = *(( unsigned short * )( ptr->program->resources ) + resource );
|
||||
const unsigned int * __restrict vi = ( const unsigned int* )v;
|
||||
|
||||
GLuint dstVidOffset = gmmIdToOffset( driver->sharedFPConstantsId ) + sharedResource * 16;
|
||||
unsigned int values[4];
|
||||
values[0] = SWAP_IF_BIG_ENDIAN( vi[0] );
|
||||
values[1] = ( 1 < SIZE ) ? SWAP_IF_BIG_ENDIAN( vi[1] ) : 0;
|
||||
values[2] = ( 2 < SIZE ) ? SWAP_IF_BIG_ENDIAN( vi[2] ) : 0;
|
||||
values[3] = ( 3 < SIZE ) ? SWAP_IF_BIG_ENDIAN( vi[3] ) : 0;
|
||||
rglGcmInlineTransfer(thisContext, dstVidOffset, values, 4, 0 );
|
||||
|
||||
// XXX we don't care about 32bit wrapping, do we ?
|
||||
++LContext->LastFPConstantModification;
|
||||
}
|
||||
|
||||
template<int SIZE> static void setVectorTypeSharedfpIndexArray (void *data, const void* __restrict v, const int index )
|
||||
{
|
||||
CellGcmContextData *thisContext = (CellGcmContextData*)gCellGcmCurrentContext;
|
||||
CgRuntimeParameter *ptr = (CgRuntimeParameter*)data;
|
||||
RGLcontext * LContext = _CurrentContext;
|
||||
rglGcmDriver *driver = (rglGcmDriver*)_CurrentDevice->rasterDriver;
|
||||
const CgParameterResource *parameterResource = rglGetParameterResource( ptr->program, ptr->parameterEntry );
|
||||
unsigned short resource = parameterResource->resource;
|
||||
|
||||
//slow... skip the indices
|
||||
unsigned short *sharedResourcePtr = (( unsigned short * )( ptr->program->resources ) + resource );//no +1 here, we want the register
|
||||
int arrayIndex = index;
|
||||
while ( arrayIndex ) //jump to the right index... this is slow
|
||||
{
|
||||
sharedResourcePtr += (( *sharedResourcePtr ) + 2 );////+1 for the register, +1 for the count, +count for the number of embedded consts
|
||||
arrayIndex--;
|
||||
}
|
||||
unsigned short sharedResource = *sharedResourcePtr;
|
||||
|
||||
const unsigned int * __restrict vi = ( const unsigned int* )v;
|
||||
|
||||
GLuint dstVidOffset = gmmIdToOffset( driver->sharedFPConstantsId ) + sharedResource * 16;
|
||||
unsigned int values[4];
|
||||
values[0] = SWAP_IF_BIG_ENDIAN( vi[0] );
|
||||
values[1] = ( 1 < SIZE ) ? SWAP_IF_BIG_ENDIAN( vi[1] ) : 0;
|
||||
values[2] = ( 2 < SIZE ) ? SWAP_IF_BIG_ENDIAN( vi[2] ) : 0;
|
||||
values[3] = ( 3 < SIZE ) ? SWAP_IF_BIG_ENDIAN( vi[3] ) : 0;
|
||||
rglGcmInlineTransfer(thisContext, dstVidOffset, values, 4, 0 );
|
||||
|
||||
// XXX we don't care about 32bit wrapping, do we ?
|
||||
++LContext->LastFPConstantModification;
|
||||
}
|
||||
template<int SIZE> static void setVectorTypeSharedvpIndex (void *data, const void* __restrict v, const int /*index*/ )
|
||||
{
|
||||
CellGcmContextData *thisContext = (CellGcmContextData*)gCellGcmCurrentContext;
|
||||
CgRuntimeParameter *ptr = (CgRuntimeParameter*)data;
|
||||
const float * __restrict f = ( const float * __restrict )v;
|
||||
const CgParameterResource *parameterResource = rglGetParameterResource( ptr->program, ptr->parameterEntry );
|
||||
unsigned short resource = parameterResource->resource;
|
||||
float * __restrict dst = ( float * __restrict )ptr->pushBufferPointer;
|
||||
for ( long i = 0; i < SIZE; ++ i )
|
||||
dst[i] = f[i];
|
||||
|
||||
// save to shared memory for context restore after flip
|
||||
rglGcmDriver *driver = (rglGcmDriver*)_CurrentDevice->rasterDriver;
|
||||
|
||||
memcpy(driver->sharedVPConstants + resource * 4 * sizeof( float ),
|
||||
dst, 4 * sizeof(float));
|
||||
|
||||
thisContext->current[0] = (((5) << (18)) | CELL_GCM_NV4097_SET_TRANSFORM_CONSTANT_LOAD);
|
||||
thisContext->current[1] = resource;
|
||||
thisContext->current += 2;
|
||||
|
||||
memcpy(thisContext->current, dst, sizeof(float)*4);
|
||||
thisContext->current += 4;
|
||||
dst += 4;
|
||||
}
|
||||
|
||||
template<int SIZE> static void setVectorTypeSharedvpIndexArray (void *data, const void* __restrict v, const int index )
|
||||
{
|
||||
CellGcmContextData *thisContext = (CellGcmContextData*)gCellGcmCurrentContext;
|
||||
CgRuntimeParameter *ptr = (CgRuntimeParameter*)data;
|
||||
const float * __restrict f = ( const float * __restrict )v;
|
||||
const CgParameterResource *parameterResource = rglGetParameterResource( ptr->program, ptr->parameterEntry );
|
||||
unsigned short resource = parameterResource->resource + index; ///TODO: assume contiguous here , right ?
|
||||
float * __restrict dst = ( float * __restrict )ptr->pushBufferPointer;
|
||||
for ( long i = 0; i < SIZE; ++ i )
|
||||
dst[i] = f[i];
|
||||
|
||||
// save to shared memory for context restore after flip
|
||||
rglGcmDriver *driver = (rglGcmDriver*)_CurrentDevice->rasterDriver;
|
||||
|
||||
memcpy(driver->sharedVPConstants + resource * 4 * sizeof( float ),
|
||||
dst, 4 * sizeof(float));
|
||||
|
||||
thisContext->current[0] = (((5) << (18)) | CELL_GCM_NV4097_SET_TRANSFORM_CONSTANT_LOAD);
|
||||
thisContext->current[1] = resource;
|
||||
thisContext->current += 2;
|
||||
|
||||
memcpy(thisContext->current, dst, sizeof(float)*4);
|
||||
thisContext->current += 4;
|
||||
dst += 4;
|
||||
}
|
||||
|
||||
|
||||
@ -316,90 +224,10 @@ template <int ROWS, int COLS, int ORDER, bool isVpIndexArray> static void setMat
|
||||
|
||||
template <int ROWS, int COLS, int ORDER> static void setMatrixSharedfpIndex (void *data, const void* __restrict v, const int /*index*/ )
|
||||
{
|
||||
CellGcmContextData *thisContext = (CellGcmContextData*)gCellGcmCurrentContext;
|
||||
CgRuntimeParameter *ptr = (CgRuntimeParameter*)data;
|
||||
rglGcmDriver *driver = (rglGcmDriver*)_CurrentDevice->rasterDriver;
|
||||
|
||||
const CgParameterResource *parameterResource = rglGetParameterResource( ptr->program, ptr->parameterEntry );
|
||||
unsigned short resource = parameterResource->resource;
|
||||
unsigned short sharedResource = *(( unsigned short * )( ptr->program->resources ) + resource );
|
||||
|
||||
GLuint dstVidOffset = gmmIdToOffset( driver->sharedFPConstantsId ) + sharedResource * 16;
|
||||
//we assume that the assignment is contiguous
|
||||
const unsigned int * __restrict u = ( const unsigned int* )v;
|
||||
|
||||
unsigned int tmp[ROWS*4];
|
||||
for ( long row = 0; row < ROWS; ++row )
|
||||
{
|
||||
tmp[row*4 + 0] = (( ORDER == ROW_MAJOR ) ? u[row * COLS + 0] : u[0 * ROWS + row] );
|
||||
tmp[row*4 + 1] = (( 1 < COLS ) ? (( ORDER == ROW_MAJOR ) ? u[row * COLS + 1] : u[1 * ROWS + row] ) : 0 );
|
||||
tmp[row*4 + 2] = (( 2 < COLS ) ? (( ORDER == ROW_MAJOR ) ? u[row * COLS + 2] : u[2 * ROWS + row] ) : 0 );
|
||||
tmp[row*4 + 3] = (( 3 < COLS ) ? (( ORDER == ROW_MAJOR ) ? u[row * COLS + 3] : u[3 * ROWS + row] ) : 0 );
|
||||
}
|
||||
rglGcmSetTransferLocation(thisContext, CELL_GCM_LOCATION_LOCAL );
|
||||
void *pointer=NULL;
|
||||
rglGcmSetInlineTransferPointer(thisContext, dstVidOffset, (4*ROWS), pointer);
|
||||
float *fp = (float*)pointer;
|
||||
float *src = (float*)tmp;
|
||||
for (uint32_t j=0; j<ROWS;j++)
|
||||
{
|
||||
fp[0] = cellGcmSwap16Float32(src[0]);
|
||||
fp[1] = cellGcmSwap16Float32(src[1]);
|
||||
fp[2] = cellGcmSwap16Float32(src[2]);
|
||||
fp[3] = cellGcmSwap16Float32(src[3]);
|
||||
fp+=4;src+=4;
|
||||
}
|
||||
|
||||
RGLcontext * LContext = _CurrentContext;
|
||||
++LContext->LastFPConstantModification;
|
||||
}
|
||||
|
||||
template <int ROWS, int COLS, int ORDER> static void setMatrixSharedfpIndexArray (void *data, const void* __restrict v, const int index )
|
||||
{
|
||||
CellGcmContextData *thisContext = (CellGcmContextData*)gCellGcmCurrentContext;
|
||||
CgRuntimeParameter *ptr = (CgRuntimeParameter*)data;
|
||||
//TODO: double check for the semi endian swap... not done here, is it done by the RSX ?
|
||||
rglGcmDriver *driver = (rglGcmDriver*)_CurrentDevice->rasterDriver;
|
||||
|
||||
const CgParameterResource *parameterResource = rglGetParameterResource( ptr->program, ptr->parameterEntry );
|
||||
unsigned short resource = parameterResource->resource;
|
||||
//slow... skip the indices
|
||||
unsigned short *sharedResourcePtr = (( unsigned short * )( ptr->program->resources ) + resource );
|
||||
int arrayIndex = index * ROWS;
|
||||
while ( arrayIndex ) //jump to the right index... this is slow
|
||||
{
|
||||
sharedResourcePtr += (( *sharedResourcePtr ) + 2 );//+1 for the register, +1 for the count, +count for the number of embedded consts
|
||||
arrayIndex--;
|
||||
}
|
||||
unsigned short sharedResource = *sharedResourcePtr;
|
||||
|
||||
GLuint dstVidOffset = gmmIdToOffset( driver->sharedFPConstantsId ) + sharedResource * 16;
|
||||
//we assume that the assignment is contiguous
|
||||
const unsigned int * __restrict u = ( const unsigned int* )v;
|
||||
|
||||
unsigned int tmp[ROWS*4];
|
||||
for ( long row = 0; row < ROWS; ++row )
|
||||
{
|
||||
tmp[row*4 + 0] = (( ORDER == ROW_MAJOR ) ? u[row * COLS + 0] : u[0 * ROWS + row] );
|
||||
tmp[row*4 + 1] = (( 1 < COLS ) ? (( ORDER == ROW_MAJOR ) ? u[row * COLS + 1] : u[1 * ROWS + row] ) : 0 );
|
||||
tmp[row*4 + 2] = (( 2 < COLS ) ? (( ORDER == ROW_MAJOR ) ? u[row * COLS + 2] : u[2 * ROWS + row] ) : 0 );
|
||||
tmp[row*4 + 3] = (( 3 < COLS ) ? (( ORDER == ROW_MAJOR ) ? u[row * COLS + 3] : u[3 * ROWS + row] ) : 0 );
|
||||
}
|
||||
|
||||
rglGcmSetTransferLocation(thisContext, CELL_GCM_LOCATION_LOCAL );
|
||||
|
||||
void *pointer=NULL;
|
||||
rglGcmSetInlineTransferPointer(thisContext, dstVidOffset, (4*ROWS), pointer);
|
||||
float *fp = (float*)pointer;
|
||||
const float *src = (const float*)tmp;
|
||||
for (uint32_t j=0; j<4*ROWS;j++)
|
||||
{
|
||||
*fp = cellGcmSwap16Float32(*src);
|
||||
fp++;src++;
|
||||
}
|
||||
|
||||
RGLcontext * LContext = _CurrentContext;
|
||||
++LContext->LastFPConstantModification;
|
||||
}
|
||||
|
||||
template <int ROWS, int COLS, int ORDER> static void setMatrixvpIndexArray (void *data, const void* __restrict v, const int index )
|
||||
@ -637,12 +465,6 @@ void rglCreatePushBuffer(void *data)
|
||||
samplerCount += arrayCount;
|
||||
}
|
||||
else if ( profileIndex == VERTEX_PROFILE_INDEX )
|
||||
{
|
||||
if ( parameterResource->type == CGP_SCF_BOOL )
|
||||
{
|
||||
//do nothing
|
||||
}
|
||||
else if ( !( parameterEntry->flags & CGPF_SHARED ) )
|
||||
{
|
||||
int registerStride = isMatrix(( CGtype )parameterResource->type ) ? rglGetTypeRowCount(( CGtype )parameterResource->type ) : 1;
|
||||
if ( parameterEntry->flags & CGP_CONTIGUOUS )
|
||||
@ -663,42 +485,17 @@ void rglCreatePushBuffer(void *data)
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
hasSharedParams = true;
|
||||
if ( !( parameterEntry->flags & CGP_CONTIGUOUS ) )
|
||||
programPushBufferPointersSize += arrayCount;
|
||||
}
|
||||
}
|
||||
else //profileIndex == FRAGMENT_PROFILE_INDEX
|
||||
{
|
||||
int registerStride = isMatrix(( CGtype )parameterResource->type ) ? rglGetTypeRowCount(( CGtype )parameterResource->type ) : 1;
|
||||
if ( !( parameterEntry->flags & CGPF_SHARED ) )
|
||||
{
|
||||
//TODO: check this case
|
||||
extraStorageInWords += 4 * arrayCount * registerStride;
|
||||
}
|
||||
else
|
||||
{
|
||||
hasSharedParams = true;
|
||||
unsigned short *resource = program->resources + parameterResource->resource;
|
||||
for ( int j = 0;j < arrayCount*registerStride;j++ )
|
||||
{
|
||||
resource++;
|
||||
unsigned short count = *resource++;
|
||||
bufferSize += 24 * count;
|
||||
resource += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
arrayCount = 1;
|
||||
}
|
||||
|
||||
if (( profileIndex == FRAGMENT_PROFILE_INDEX ) && (hasSharedParams))
|
||||
bufferSize += 8 + 3 + 2; // GCM_PORT_TESTED [CEDRIC] +3 for the channel switch that gcm does + 2 for the OUT end
|
||||
|
||||
bufferSize = rglPad( bufferSize, 4 );
|
||||
|
||||
//allocate the buffer(s)
|
||||
@ -820,12 +617,6 @@ void rglCreatePushBuffer(void *data)
|
||||
else
|
||||
{
|
||||
if ( profileIndex == VERTEX_PROFILE_INDEX )
|
||||
{
|
||||
if ( parameterResource->type == CGP_SCF_BOOL )
|
||||
{
|
||||
//do nothing
|
||||
}
|
||||
else if ( !( parameterEntry->flags & CGPF_SHARED ) )
|
||||
{
|
||||
int registerStride = isMatrix(( CGtype )parameterResource->type ) ? rglGetTypeRowCount(( CGtype )parameterResource->type ) : 1;
|
||||
int registerCount = arrayCount * registerStride;
|
||||
@ -866,39 +657,13 @@ void rglCreatePushBuffer(void *data)
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
rglGcmDriver *driver = (rglGcmDriver*)_CurrentDevice->rasterDriver;
|
||||
|
||||
if ( parameterEntry->flags & CGP_CONTIGUOUS )
|
||||
rtParameter->pushBufferPointer = driver->sharedVPConstants + parameterResource->resource * 4 * sizeof( float );
|
||||
else
|
||||
{
|
||||
int registerStride = isMatrix(( CGtype )parameterResource->type ) ? rglGetTypeRowCount(( CGtype )parameterResource->type ) : 1;
|
||||
int registerCount = arrayCount * registerStride;
|
||||
for ( int j = 0;j < registerCount;j += registerStride )
|
||||
{
|
||||
*programPushBuffer = ( unsigned int* )driver->sharedVPConstants + program->resources[parameterResource->resource+j] * 4 * sizeof( float );
|
||||
rtParameter->pushBufferPointer = programPushBuffer++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else //if (profileIndex == FRAGMENT_PROFILE_INDEX)
|
||||
{
|
||||
if ( parameterEntry->flags & CGPF_SHARED )
|
||||
{
|
||||
// XXX needs an offset for the get
|
||||
rtParameter->pushBufferPointer = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
int registerStride = isMatrix(( CGtype )parameterResource->type ) ? rglGetTypeRowCount(( CGtype )parameterResource->type ) : 1;
|
||||
int registerCount = arrayCount * registerStride;
|
||||
rtParameter->pushBufferPointer = currentStorage;
|
||||
currentStorage += 4 * registerCount;
|
||||
}
|
||||
}
|
||||
|
||||
switch ( parameterResource->type )
|
||||
{
|
||||
@ -1588,9 +1353,6 @@ void *rglPlatformRasterInit (void)
|
||||
driver->flushBufferCount = 0;
|
||||
|
||||
// [YLIN] Make it 16 byte align
|
||||
driver->sharedVPConstants = (char*)memalign(16, 4 * sizeof( float ) * RGL_MAX_VP_SHARED_CONSTANTS);
|
||||
driver->sharedFPConstantsId = gmmAlloc((CellGcmContextData*)&rglGcmState_i.fifo,
|
||||
0, 4 * sizeof(float) * RGL_MAX_FP_SHARED_CONSTANTS);
|
||||
|
||||
return driver;
|
||||
}
|
||||
@ -1600,9 +1362,6 @@ void rglPlatformRasterExit (void *data)
|
||||
{
|
||||
rglGcmDriver *driver = (rglGcmDriver*)data;
|
||||
|
||||
gmmFree( driver->sharedFPConstantsId );
|
||||
free( driver->sharedVPConstants );
|
||||
|
||||
if (driver)
|
||||
free(driver);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user