mirror of
https://github.com/libretro/pcsx2.git
synced 2024-11-25 02:09:52 +00:00
e3c741bb2a
* new memory management * asm was replaced by intrinsic * new GLSL backend (AMD only) Cmake is probably broken anyway with the 2 plugins... * and lots of others stuff that I forgot about it ;) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5166 96395faa-99c1-11dd-bbfe-3dabce05a288
565 lines
20 KiB
C++
565 lines
20 KiB
C++
/* ZeroGS KOSMOS
|
|
* Copyright (C) 2005-2006 zerofrog@gmail.com
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
#include "GS.h"
|
|
#include "Util.h"
|
|
#include "ZZoglMem.h"
|
|
#include "targets.h"
|
|
#include "x86.h"
|
|
|
|
#include "Mem_Swizzle.h"
|
|
|
|
#ifndef ZZNORMAL_MEMORY
|
|
|
|
bool allowed_psm[256] = {false, }; // Sometimes we got strange unknown psm
|
|
PSM_value PSM_value_Table[64] = {PSMT_BAD_PSM, }; // for int -> PSM_value
|
|
|
|
// return array of pointer of array string,
|
|
// We SHOULD do memory allocation for u32** -- otherwize we have a lot of trouble!
|
|
// if bw and bh are set correctly, as dimensions of table, than array have pointers
|
|
// to table rows, so array[i][j] = table[i][j];
|
|
inline u32** InitTable(int bh, int bw, u32* table) {
|
|
u32** array = (u32**)malloc(bh * sizeof(u32*));
|
|
for (int i = 0; i < bh; i++) {
|
|
array[i] = &table[i * bw];
|
|
}
|
|
return array;
|
|
}
|
|
|
|
// initialize dynamic arrays (u32**) for each regular psm.
|
|
inline void SetTable(int psm) {
|
|
switch (psm) {
|
|
case PSMCT32:
|
|
g_pageTable[psm] = InitTable( 32, 64, &g_pageTable32[0][0]);
|
|
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable32[0][0]);
|
|
g_columnTable[psm] = InitTable( 8, 8, &g_columnTable32[0][0]);
|
|
break;
|
|
|
|
case PSMCT24:
|
|
g_pageTable[psm] = g_pageTable[PSMCT32];;
|
|
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable32[0][0]);
|
|
g_columnTable[psm] = InitTable( 8, 8, &g_columnTable32[0][0]);
|
|
break;
|
|
|
|
case PSMCT16:
|
|
g_pageTable[psm] = InitTable( 64, 64, &g_pageTable16[0][0]);
|
|
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable16[0][0]);
|
|
g_columnTable[psm] = InitTable( 8, 16, &g_columnTable16[0][0]);
|
|
break;
|
|
|
|
case PSMCT16S:
|
|
g_pageTable[psm] = InitTable( 64, 64, &g_pageTable16S[0][0]);
|
|
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable16S[0][0]);
|
|
g_columnTable[psm] = InitTable( 8, 16, &g_columnTable16[0][0]);
|
|
break;
|
|
|
|
case PSMT8:
|
|
g_pageTable[psm] = InitTable( 64, 128, &g_pageTable8[0][0]);
|
|
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable8[0][0]);
|
|
g_columnTable[psm] = InitTable( 16, 16, &g_columnTable8[0][0]);
|
|
break;
|
|
|
|
case PSMT8H:
|
|
g_pageTable[psm] = g_pageTable[PSMCT32];
|
|
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable8[0][0]);
|
|
g_columnTable[psm] = InitTable( 16, 16, &g_columnTable8[0][0]);
|
|
break;
|
|
|
|
case PSMT4:
|
|
g_pageTable[psm] = InitTable(128, 128, &g_pageTable4[0][0]);
|
|
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable4[0][0]);
|
|
g_columnTable[psm] = InitTable( 16, 32, &g_columnTable4[0][0]);
|
|
break;
|
|
|
|
case PSMT4HL:
|
|
case PSMT4HH:
|
|
g_pageTable[psm] = g_pageTable[PSMCT32];
|
|
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable4[0][0]);
|
|
g_columnTable[psm] = InitTable( 16, 32, &g_columnTable4[0][0]);
|
|
break;
|
|
|
|
case PSMT32Z:
|
|
g_pageTable[psm] = InitTable( 32, 64, &g_pageTable32Z[0][0]);
|
|
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable32Z[0][0]);
|
|
g_columnTable[psm] = InitTable( 8, 8, &g_columnTable32[0][0]);
|
|
break;
|
|
|
|
case PSMT24Z:
|
|
g_pageTable[psm] = g_pageTable[PSMT32Z];
|
|
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable32Z[0][0]);
|
|
g_columnTable[psm] = InitTable( 8, 8, &g_columnTable32[0][0]);
|
|
break;
|
|
|
|
case PSMT16Z:
|
|
g_pageTable[psm] = InitTable( 64, 64, &g_pageTable16Z[0][0]);
|
|
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable16Z[0][0]);
|
|
g_columnTable[psm] = InitTable( 8, 16, &g_columnTable16[0][0]);
|
|
break;
|
|
|
|
case PSMT16SZ:
|
|
g_pageTable[psm] = InitTable( 64, 64, &g_pageTable16SZ[0][0]);
|
|
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable16SZ[0][0]);
|
|
g_columnTable[psm] = InitTable( 8, 16, &g_columnTable16[0][0]);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// After this, the function arrays with u32** have memory set and filled.
|
|
void FillBlockTables() {
|
|
for (int i = 0; i < MAX_PSM; i++)
|
|
SetTable(i);
|
|
}
|
|
|
|
// Deallocate memory for u32** arrays.
|
|
void DestroyBlockTables() {
|
|
for (int i = 0; i < MAX_PSM; i++) {
|
|
if (g_pageTable[i] != NULL && (i != PSMT8H && i != PSMT4HL && i != PSMT4HH && i != PSMCT24 && i != PSMT24Z))
|
|
free(g_pageTable[i]);
|
|
|
|
if (g_blockTable[i] != NULL)
|
|
free(g_blockTable[i]);
|
|
|
|
if (g_columnTable[i] != NULL)
|
|
free(g_columnTable[i]);
|
|
}
|
|
}
|
|
|
|
void FillNewPageTable() {
|
|
int k = 0;
|
|
for (int psm = 0; psm < MAX_PSM; psm ++)
|
|
if (allowed_psm[psm]) {
|
|
for (u32 i = 0; i < 127; i++)
|
|
for(u32 j = 0; j < 127; j++) {
|
|
u32 address;
|
|
u32 shift;
|
|
|
|
address = g_pageTable[psm][i & ZZ_DT[psm][3]][j & ZZ_DT[psm][4]];
|
|
shift = (((address << ZZ_DT[psm][5]) & 0x7 ) << 3)+ ZZ_DT[psm][7]; // last part is for 8H, 4HL and 4HH -- they have data from 24 and 28 byte
|
|
g_pageTable2[k][i][j] = (address >> ZZ_DT[psm][0]) + (shift << 16); // now lower 16 byte of page table is 32-bit aligned address, and upper --
|
|
// shift.
|
|
}
|
|
g_pageTableNew[psm] = InitTable( 128, 128, &g_pageTable2[k][0][0]);
|
|
k++;;
|
|
}
|
|
}
|
|
|
|
BLOCK m_Blocks[MAX_PSM]; // Do so that blocks are indexable.
|
|
|
|
// At the begining and the end of each string we should made unaligned writes, with nSize checks. We should be sure that all
|
|
// these pixels are inside one widthlimit space.
|
|
template <int psm>
|
|
inline bool DoOneTransmitStep(void* pstart, int& nSize, int endj, const void* pbuf, int& k, int& i, int& j, int widthlimit) {
|
|
for (; j < endj && nSize > 0; j++, k++, nSize -= 1) {
|
|
writePixelMem<psm, false>((u32*)pstart, j%2048, i%2048, (u32*)(pbuf), k, gs.dstbuf.bw);
|
|
}
|
|
|
|
return (nSize == 0);
|
|
}
|
|
|
|
// FFX has PSMT8 transmit (starting intro -- sword and hairs).
|
|
// Persona 4 texts at start are PSMCT32 (and there is also PSMCT16 transmit somwhere after that).
|
|
// Tekken V has PSMCT24 and PSMT4 transfers
|
|
|
|
// This function transfers "Y" block pixels. I use little another code than Zerofrog. My code often uses widthmult != 1 addition (Zerofrog's code
|
|
// have an strict condition for fast path: width of transferred data should be widthlimit multiplied by j; EndY also should be multiplied. But
|
|
// the usual data block of 255 pixels becomes transfered by 1.
|
|
// I should check, maybe Unaligned_Start and Unaligned_End often == 0, and I could try a fastpath -- with this block off.
|
|
template <int psm, int widthlimit>
|
|
inline bool TRANSMIT_HOSTLOCAL_Y(u32* pbuf, int& nSize, u8* pstart, int endY, int& i, int& j, int& k) {
|
|
// if (psm != PSMT8 && psm != 0 && psm != PSMT4 && psm != PSMCT24)
|
|
// ERROR_LOG("This is usable function TRANSMIT_HOSTLOCAL_Y at ZZoglMem.cpp %d %d %d %d %d\n", psm, widthlimit, i, j, nSize);
|
|
|
|
int q = (gs.trxpos.dx - j) % widthlimit;
|
|
if (DoOneTransmitStep<psm>(pstart, nSize, q, pbuf, k, i, j, widthlimit)) return true; // After this j and dx are compatible by modyle of widthlimit
|
|
|
|
int Unaligned_Start = (gs.trxpos.dx % widthlimit == 0) ? 0 : widthlimit - gs.trxpos.dx % widthlimit; // gs.trpos.dx + Unaligned_Start is multiple of widthlimit
|
|
for (; i < endY; ++i) {
|
|
if (DoOneTransmitStep<psm>(pstart, nSize, j + Unaligned_Start, pbuf, k, i, j, widthlimit)) return true; // This operation made j % widthlimit == 0.
|
|
//assert (j % widthlimit != 0);
|
|
|
|
for (; j < gs.imageEnd.x - widthlimit + 1 && nSize >= widthlimit; j += widthlimit, nSize -= widthlimit) {
|
|
writePixelsFromMemory<psm, true, widthlimit>(pstart, pbuf, k, j % 2048, i % 2048, gs.dstbuf.bw);
|
|
}
|
|
|
|
assert ( gs.imageEnd.x - j < widthlimit || nSize < widthlimit);
|
|
if (DoOneTransmitStep<psm>(pstart, nSize, gs.imageEnd.x, pbuf, k, i, j, widthlimit)) return true; // There are 2 reasons for finish of previous for: 1) nSize < widthlimit
|
|
// 2) j > gs.imageEnd.x - widthlimit + 1. We would try to write pixels up do
|
|
// EndX, it's no more widthlimit pixels
|
|
j = gs.trxpos.dx;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// PSMT4 -- Tekken V
|
|
template <int psm, int widthlimit>
|
|
inline void TRANSMIT_HOSTLOCAL_X(u32* pbuf, int& nSize, u8* pstart, int& i, int& j, int& k, int blockheight, int startX, int pitch, int fracX) {
|
|
if (psm != PSMT8 && psm != PSMT4)
|
|
ZZLog::Error_Log("This is usable function TRANSMIT_HOSTLOCAL_X at ZZoglMem.cpp %d %d %d %d %d\n", psm, widthlimit, i, j, nSize);
|
|
|
|
for(int tempi = 0; tempi < blockheight; ++tempi) {
|
|
for(j = startX; j < gs.imageEnd.x; j++, k++) {
|
|
writePixelMem<psm, false>((u32*)pstart, j%2048, (i + tempi)%2048, (u32*)(pbuf), k, gs.dstbuf.bw);
|
|
}
|
|
k += ( pitch - fracX );
|
|
}
|
|
}
|
|
|
|
template <int psm>
|
|
inline int TRANSMIT_PITCH(int pitch) {
|
|
return (PSM_BITS_PER_PIXEL<psm>() * pitch) >> 3;
|
|
}
|
|
|
|
// ------------------------
|
|
// | Y |
|
|
// ------------------------
|
|
// | block | |
|
|
// | aligned area | X |
|
|
// | | |
|
|
// ------------------------
|
|
// | Y |
|
|
// ------------------------
|
|
|
|
|
|
template <int psmX>
|
|
int FinishTransfer(int i, int j, int nSize, int nLeftOver)
|
|
{
|
|
if( i >= gs.imageEnd.y )
|
|
{
|
|
assert( gs.transferring == false || i == gs.imageEnd.y );
|
|
gs.transferring = false;
|
|
}
|
|
else {
|
|
/* update new params */
|
|
gs.image.y = i;
|
|
gs.image.x = j;
|
|
}
|
|
|
|
return (nSize * TRANSMIT_PITCH<psmX>(2) + nLeftOver)/2;
|
|
}
|
|
|
|
template<int psmX, int widthlimit, int blockbits, int blockwidth, int blockheight>
|
|
int TransferHostLocal(const void* pbyMem, u32 nQWordSize)
|
|
{
|
|
assert( gs.imageTransfer == XFER_HOST_TO_LOCAL );
|
|
u8* pstart = g_pbyGSMemory + gs.dstbuf.bp*256;
|
|
|
|
int i = gs.image.y, j = gs.image.x;
|
|
|
|
const u8* pbuf = (const u8*)pbyMem;
|
|
int nLeftOver = (nQWordSize*4*2)%(TRANSMIT_PITCH<psmX>(2));
|
|
int nSize = nQWordSize*4*2/TRANSMIT_PITCH<psmX>(2);
|
|
nSize = min(nSize, gs.imageNew.w * gs.imageNew.h);
|
|
|
|
int pitch, area, fracX;
|
|
int endY = ROUND_UPPOW2(i, blockheight);
|
|
Point alignedPt;
|
|
|
|
alignedPt.x = ROUND_DOWNPOW2(gs.imageEnd.x, blockwidth);
|
|
alignedPt.y = ROUND_DOWNPOW2(gs.imageEnd.y, blockheight);
|
|
|
|
bool bAligned;
|
|
bool bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (j == gs.trxpos.dx) && (alignedPt.y > endY) && alignedPt.x > gs.trxpos.dx;
|
|
|
|
if( (gs.imageEnd.x - gs.trxpos.dx) % widthlimit ) {
|
|
/* hack */
|
|
int testwidth = (int)nSize - (gs.imageEnd.y - i) * (gs.imageEnd.x - gs.trxpos.dx) + (j - gs.trxpos.dx);
|
|
if((testwidth <= widthlimit) && (testwidth >= -widthlimit)) {
|
|
/* don't transfer */
|
|
/*ZZLog::Debug_Log("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEnd.x, nQWordSize);*/
|
|
gs.transferring = false;
|
|
}
|
|
bCanAlign = false;
|
|
}
|
|
|
|
/* first align on block boundary */
|
|
if( MOD_POW2(i, blockheight) || !bCanAlign ) {
|
|
|
|
if( !bCanAlign )
|
|
endY = gs.imageEnd.y; /* transfer the whole image */
|
|
else
|
|
assert( endY < gs.imageEnd.y); /* part of alignment condition */
|
|
|
|
int limit = widthlimit;
|
|
if (((gs.imageEnd.x - gs.trxpos.dx) % widthlimit) || ((gs.imageEnd.x - j) % widthlimit))
|
|
/* transmit with a width of 1 */
|
|
limit = 1 + (gs.dstbuf.psm == PSMT4);
|
|
/*TRANSMIT_HOSTLOCAL_Y##TransSfx(psm, T, limit, endY)*/
|
|
int k = 0;
|
|
|
|
if (TRANSMIT_HOSTLOCAL_Y<psmX, widthlimit>((u32*)pbuf, nSize, pstart, endY, i, j, k))
|
|
return FinishTransfer<psmX>(i, j, nSize, nLeftOver);
|
|
|
|
pbuf += TRANSMIT_PITCH<psmX>(k);
|
|
|
|
if (nSize == 0 || i == gs.imageEnd.y) return FinishTransfer<psmX>(i, j, nSize, nLeftOver);
|
|
}
|
|
|
|
assert( MOD_POW2(i, blockheight) == 0 && j == gs.trxpos.dx);
|
|
|
|
/* can align! */
|
|
pitch = gs.imageEnd.x - gs.trxpos.dx;
|
|
area = pitch * blockheight;
|
|
fracX = gs.imageEnd.x - alignedPt.x;
|
|
|
|
/* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */
|
|
bAligned = !((uptr)pbuf & 0xf) && ((TRANSMIT_PITCH<psmX>(pitch)&0xf) == 0);
|
|
|
|
/* transfer aligning to blocks */
|
|
for(; i < alignedPt.y && nSize >= area; i += blockheight, nSize -= area) {
|
|
|
|
for(int tempj = gs.trxpos.dx; tempj < alignedPt.x; tempj += blockwidth, pbuf += TRANSMIT_PITCH<psmX>(blockwidth)) {
|
|
SwizzleBlock<psmX>((u32*)(pstart + getPixelAddress<psmX>(tempj, i, gs.dstbuf.bw)*blockbits/8),
|
|
(u32*)pbuf, TRANSMIT_PITCH<psmX>(pitch));
|
|
}
|
|
|
|
/* transfer the rest */
|
|
if( alignedPt.x < gs.imageEnd.x ) {
|
|
int k = 0;
|
|
TRANSMIT_HOSTLOCAL_X<psmX, widthlimit>((u32*)pbuf, nSize, pstart, i, j, k, blockheight, alignedPt.x, pitch, fracX);
|
|
pbuf += TRANSMIT_PITCH<psmX>(k - alignedPt.x + gs.trxpos.dx);
|
|
}
|
|
else pbuf += (blockheight-1)*TRANSMIT_PITCH<psmX>(pitch);
|
|
j = gs.trxpos.dx;
|
|
}
|
|
|
|
if( TRANSMIT_PITCH<psmX>(nSize)/4 > 0 ) {
|
|
int k = 0;
|
|
TRANSMIT_HOSTLOCAL_Y<psmX, widthlimit>((u32*)pbuf, nSize, pstart, gs.imageEnd.y, i, j, k);
|
|
pbuf += TRANSMIT_PITCH<psmX>(k);
|
|
/* sometimes wrong sizes are sent (tekken tag) */
|
|
assert( gs.transferring == false || TRANSMIT_PITCH<psmX>(nSize)/4 <= 2 );
|
|
}
|
|
|
|
return FinishTransfer<psmX>(i, j, nSize, nLeftOver);
|
|
}
|
|
|
|
inline int TransferHostLocal32(const void* pbyMem, u32 nQWordSize)
|
|
{
|
|
return TransferHostLocal<PSMCT32, 2, 32, 8, 8>( pbyMem, nQWordSize);
|
|
}
|
|
|
|
inline int TransferHostLocal32Z(const void* pbyMem, u32 nQWordSize)
|
|
{
|
|
return TransferHostLocal<PSMT32Z, 2, 32, 8, 8>( pbyMem, nQWordSize);
|
|
}
|
|
|
|
inline int TransferHostLocal24(const void* pbyMem, u32 nQWordSize)
|
|
{
|
|
return TransferHostLocal<PSMCT24, 8, 32, 8, 8>( pbyMem, nQWordSize);
|
|
}
|
|
|
|
inline int TransferHostLocal24Z(const void* pbyMem, u32 nQWordSize)
|
|
{
|
|
return TransferHostLocal<PSMT24Z, 8, 32, 8, 8>( pbyMem, nQWordSize);
|
|
}
|
|
|
|
inline int TransferHostLocal16(const void* pbyMem, u32 nQWordSize)
|
|
{
|
|
return TransferHostLocal<PSMCT16, 4, 16, 16, 8>( pbyMem, nQWordSize);
|
|
}
|
|
|
|
inline int TransferHostLocal16S(const void* pbyMem, u32 nQWordSize)
|
|
{
|
|
return TransferHostLocal<PSMCT16S, 4, 16, 16, 8>( pbyMem, nQWordSize);
|
|
}
|
|
|
|
inline int TransferHostLocal16Z(const void* pbyMem, u32 nQWordSize)
|
|
{
|
|
return TransferHostLocal<PSMT16Z, 4, 16, 16, 8>( pbyMem, nQWordSize);
|
|
}
|
|
|
|
inline int TransferHostLocal16SZ(const void* pbyMem, u32 nQWordSize)
|
|
{
|
|
return TransferHostLocal<PSMT16SZ, 4, 16, 16, 8>( pbyMem, nQWordSize);
|
|
}
|
|
|
|
inline int TransferHostLocal8(const void* pbyMem, u32 nQWordSize)
|
|
{
|
|
return TransferHostLocal<PSMT8, 4, 8, 16, 16>( pbyMem, nQWordSize);
|
|
}
|
|
|
|
inline int TransferHostLocal4(const void* pbyMem, u32 nQWordSize)
|
|
{
|
|
return TransferHostLocal<PSMT4, 8, 4, 32, 16>( pbyMem, nQWordSize);
|
|
}
|
|
|
|
inline int TransferHostLocal8H(const void* pbyMem, u32 nQWordSize)
|
|
{
|
|
return TransferHostLocal<PSMT8H, 4, 32, 8, 8>( pbyMem, nQWordSize);
|
|
}
|
|
|
|
inline int TransferHostLocal4HL(const void* pbyMem, u32 nQWordSize)
|
|
{
|
|
return TransferHostLocal<PSMT4HL, 8, 32, 8, 8>( pbyMem, nQWordSize);
|
|
}
|
|
|
|
inline int TransferHostLocal4HH(const void* pbyMem, u32 nQWordSize)
|
|
{
|
|
return TransferHostLocal<PSMT4HH, 8, 32, 8, 8>( pbyMem, nQWordSize);
|
|
}
|
|
|
|
void TransferLocalHost32(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
|
void TransferLocalHost24(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
|
void TransferLocalHost16(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
|
void TransferLocalHost16S(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
|
void TransferLocalHost8(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
|
void TransferLocalHost4(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
|
void TransferLocalHost8H(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
|
void TransferLocalHost4HL(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
|
void TransferLocalHost4HH(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
|
void TransferLocalHost32Z(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
|
void TransferLocalHost24Z(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
|
void TransferLocalHost16Z(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
|
void TransferLocalHost16SZ(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
|
|
|
inline void FILL_BLOCK(BLOCK& b, int floatfmt, vector<char>& vBlockData, vector<char>& vBilinearData, int ox, int oy, int psmX) {
|
|
int bw = ZZ_DT[psmX][4] + 1;
|
|
int bh = ZZ_DT[psmX][3] + 1;
|
|
int mult = 1 << ZZ_DT[psmX][0];
|
|
|
|
b.vTexDims = float4 (BLOCK_TEXWIDTH/(float)(bw), BLOCK_TEXHEIGHT/(float)(bh), 0, 0);
|
|
b.vTexBlock = float4( (float)bw/BLOCK_TEXWIDTH, (float)bh/BLOCK_TEXHEIGHT, ((float)ox+0.2f)/BLOCK_TEXWIDTH, ((float)oy+0.05f)/BLOCK_TEXHEIGHT);
|
|
b.width = bw;
|
|
b.height = bh;
|
|
b.colwidth = bh / 4;
|
|
b.colheight = bw / 8;
|
|
b.bpp = 32/mult;
|
|
|
|
b.pageTable = g_pageTable[psmX];
|
|
b.blockTable = g_blockTable[psmX];
|
|
b.columnTable = g_columnTable[psmX];
|
|
|
|
// This is never true.
|
|
//assert( sizeof(g_pageTable[psmX]) == bw*bh*sizeof(g_pageTable[psmX][0][0]) );
|
|
float* psrcf = (float*)&vBlockData[0] + ox + oy * BLOCK_TEXWIDTH;
|
|
u16* psrcw = (u16*)&vBlockData[0] + ox + oy * BLOCK_TEXWIDTH;
|
|
for(int i = 0; i < bh; ++i) {
|
|
for(int j = 0; j < bw; ++j) {
|
|
/* fill the table */
|
|
u32 u = g_blockTable[psmX][(i / b.colheight)][(j / b.colwidth)] * 64 * mult + g_columnTable[psmX][i%b.colheight][j%b.colwidth];
|
|
b.pageTable[i][j] = u;
|
|
if( floatfmt ) {
|
|
psrcf[i*BLOCK_TEXWIDTH+j] = (float)(u) / (float)(GPU_TEXWIDTH*mult);
|
|
}
|
|
else {
|
|
psrcw[i*BLOCK_TEXWIDTH+j] = u;
|
|
}
|
|
}
|
|
}
|
|
|
|
if( floatfmt ) {
|
|
float4* psrcv = (float4*)&vBilinearData[0] + ox + oy * BLOCK_TEXWIDTH;
|
|
for(int i = 0; i < bh; ++i) {
|
|
for(int j = 0; j < bw; ++j) {
|
|
float4* pv = &psrcv[i*BLOCK_TEXWIDTH+j];
|
|
pv->x = psrcf[i*BLOCK_TEXWIDTH+j];
|
|
pv->y = psrcf[i*BLOCK_TEXWIDTH+((j+1)%bw)];
|
|
pv->z = psrcf[((i+1)%bh)*BLOCK_TEXWIDTH+j];
|
|
pv->w = psrcf[((i+1)%bh)*BLOCK_TEXWIDTH+((j+1)%bw)];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void BLOCK::FillBlocks(vector<char>& vBlockData, vector<char>& vBilinearData, int floatfmt)
|
|
{
|
|
FUNCLOG
|
|
vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * (floatfmt ? 4 : 2));
|
|
|
|
if (floatfmt)
|
|
vBilinearData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * sizeof(float4));
|
|
|
|
BLOCK b;
|
|
|
|
memset(m_Blocks, 0, sizeof(m_Blocks));
|
|
|
|
// 32
|
|
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 0, 0, PSMCT32);
|
|
b.TransferHostLocal = TransferHostLocal32;
|
|
b.TransferLocalHost = TransferLocalHost32;
|
|
m_Blocks[PSMCT32] = b;
|
|
|
|
// 24 (same as 32 except write/readPixel are different)
|
|
b.TransferHostLocal = TransferHostLocal24;
|
|
b.TransferLocalHost = TransferLocalHost24;
|
|
m_Blocks[PSMCT24] = b;
|
|
|
|
// 8H (same as 32 except write/readPixel are different)
|
|
b.TransferHostLocal = TransferHostLocal8H;
|
|
b.TransferLocalHost = TransferLocalHost8H;
|
|
m_Blocks[PSMT8H] = b;
|
|
|
|
b.TransferHostLocal = TransferHostLocal4HL;
|
|
b.TransferLocalHost = TransferLocalHost4HL;
|
|
m_Blocks[PSMT4HL] = b;
|
|
|
|
b.TransferHostLocal = TransferHostLocal4HH;
|
|
b.TransferLocalHost = TransferLocalHost4HH;
|
|
m_Blocks[PSMT4HH] = b;
|
|
|
|
// 32z
|
|
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 64, 0, PSMT32Z);
|
|
b.TransferHostLocal = TransferHostLocal32Z;
|
|
b.TransferLocalHost = TransferLocalHost32Z;
|
|
m_Blocks[PSMT32Z] = b;
|
|
|
|
// 24Z (same as 32Z except write/readPixel are different)
|
|
b.TransferHostLocal = TransferHostLocal24Z;
|
|
b.TransferLocalHost = TransferLocalHost24Z;
|
|
m_Blocks[PSMT24Z] = b;
|
|
|
|
// 16
|
|
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 0, 32, PSMCT16);
|
|
b.TransferHostLocal = TransferHostLocal16;
|
|
b.TransferLocalHost = TransferLocalHost16;
|
|
m_Blocks[PSMCT16] = b;
|
|
|
|
// 16s
|
|
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 64, 32, PSMCT16S);
|
|
b.TransferHostLocal = TransferHostLocal16S;
|
|
b.TransferLocalHost = TransferLocalHost16S;
|
|
m_Blocks[PSMCT16S] = b;
|
|
|
|
// 16z
|
|
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 0, 96, PSMT16Z);
|
|
b.TransferHostLocal = TransferHostLocal16Z;
|
|
b.TransferLocalHost = TransferLocalHost16Z;
|
|
m_Blocks[PSMT16Z] = b;
|
|
|
|
// 16sz
|
|
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 64, 96, PSMT16SZ);
|
|
b.TransferHostLocal = TransferHostLocal16SZ;
|
|
b.TransferLocalHost = TransferLocalHost16SZ;
|
|
m_Blocks[PSMT16SZ] = b;
|
|
|
|
// 8
|
|
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 0, 160, PSMT8);
|
|
b.TransferHostLocal = TransferHostLocal8;
|
|
b.TransferLocalHost = TransferLocalHost8;
|
|
m_Blocks[PSMT8] = b;
|
|
|
|
// 4
|
|
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 0, 224, PSMT4);
|
|
b.TransferHostLocal = TransferHostLocal4;
|
|
b.TransferLocalHost = TransferLocalHost4;
|
|
m_Blocks[PSMT4] = b;
|
|
}
|
|
|
|
#endif
|