GregMiscellaneous: sync and refresh the branch (3728:3768)

git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3769 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut@gmail.com 2010-09-15 16:54:19 +00:00
parent 59bccbccb4
commit 888a309e1a
75 changed files with 1562 additions and 1798 deletions

View File

@ -99,7 +99,7 @@ if(DEFINED USER_CMAKE_C_FLAGS)
string(STRIP "${USER_CMAKE_C_FLAGS}" CMAKE_C_FLAGS)
endif(DEFINED USER_CMAKE_C_FLAGS)
# Use some default machine flags
string(STRIP "${CMAKE_C_FLAGS} -m32 -msse -msse2 -march=i686" CMAKE_C_FLAGS)
string(STRIP "${CMAKE_C_FLAGS} -m32 -msse -msse2 -march=i686 -pthread" CMAKE_C_FLAGS)
### C++ flags
@ -110,7 +110,7 @@ if(DEFINED USER_CMAKE_CXX_FLAGS)
string(STRIP "${USER_CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS)
endif(DEFINED USER_CMAKE_CXX_FLAGS)
# Use some default machine flags
string(STRIP "${CMAKE_CXX_FLAGS} -m32 -msse -msse2 -march=i686" CMAKE_CXX_FLAGS)
string(STRIP "${CMAKE_CXX_FLAGS} -m32 -msse -msse2 -march=i686 -pthread" CMAKE_CXX_FLAGS)
#-------------------------------------------------------------------------------
# Select library system vs 3rdparty

View File

@ -21,7 +21,6 @@ Build-Depends: debhelper (>= 7.0.50), dpkg-dev (>= 1.15.5.6), cmake (>=2.8),
libsoundtouch1-dev (>= 1.3),
libsparsehash-dev (>= 1.6),
libx11-dev,
libxxf86vm-dev,
libglew1.5-dev (>= 1.5.1),
libgl1-mesa-dev,
libglu1-mesa-dev,

View File

@ -19,7 +19,6 @@ Build-Depends: debhelper (>= 7.0.50), dpkg-dev (>= 1.15.5.6), cmake (>=2.8),
libsoundtouch1-dev (>= 1.3),
libsparsehash-dev (>= 1.6),
libx11-dev,
libxxf86vm-dev,
libglew1.5-dev (>= 1.5.1),
libgl1-mesa-dev,
libglu1-mesa-dev,

View File

@ -77,6 +77,10 @@ override_dh_strip:
dh_strip --package=pcsx2-unstable --dbg-package=pcsx2-unstable-dbg
dh_strip --package=pcsx2-plugins-unstable --dbg-package=pcsx2-plugins-unstable-dbg
# Avoid to relaunch the compilation twice. (build and dh_auto_build target)
override_dh_auto_build:
# Do nothing
%:
dh $@ --parallel

View File

@ -77,6 +77,10 @@ override_dh_strip:
dh_strip --package=pcsx2-unstable --dbg-package=pcsx2-unstable-dbg
dh_strip --package=pcsx2-plugins-unstable --dbg-package=pcsx2-plugins-unstable-dbg
# Avoid to relaunch the compilation twice. (build and dh_auto_build target)
override_dh_auto_build:
# Do nothing
%:
dh $@ --parallel

View File

@ -14,6 +14,7 @@ set(CommonFlags
-fno-dse
-fno-tree-dse
-fno-strict-aliasing
-Wstrict-aliasing # Allow to track strict aliasing issue.
-pipe
-Wno-format
-Wno-unused-parameter
@ -225,7 +226,6 @@ set(pcsx2Headers
Vif_Dma.h
Vif.h
Vif_Unpack.h
Vif_Unpack.inl
vtlb.h
VUflags.h
VUmicro.h

View File

@ -37,7 +37,8 @@ __ri void UpdateCP0Status() {
void __fastcall WriteCP0Status(u32 value) {
cpuRegs.CP0.n.Status.val = value;
UpdateCP0Status();
cpuUpdateOperationMode();
cpuSetNextEventDelta(4);
}
void MapTLB(int i)
@ -532,7 +533,8 @@ void ERET() {
cpuRegs.pc = cpuRegs.CP0.n.EPC;
cpuRegs.CP0.n.Status.b.EXL = 0;
}
UpdateCP0Status();
cpuUpdateOperationMode();
cpuSetNextEventDelta(4);
intSetBranch();
}
@ -540,7 +542,8 @@ void DI() {
if (cpuRegs.CP0.n.Status.b._EDI || cpuRegs.CP0.n.Status.b.EXL ||
cpuRegs.CP0.n.Status.b.ERL || (cpuRegs.CP0.n.Status.b.KSU == 0)) {
cpuRegs.CP0.n.Status.b.EIE = 0;
//UpdateCP0Status(); // ints are disabled so checking for them is kinda silly...
// IRQs are disabled so no need to do a cpu exception/event test...
//cpuSetNextEventDelta();
}
}
@ -548,7 +551,8 @@ void EI() {
if (cpuRegs.CP0.n.Status.b._EDI || cpuRegs.CP0.n.Status.b.EXL ||
cpuRegs.CP0.n.Status.b.ERL || (cpuRegs.CP0.n.Status.b.KSU == 0)) {
cpuRegs.CP0.n.Status.b.EIE = 1;
UpdateCP0Status();
// schedule an event test, which will check for and raise pending IRQs.
cpuSetNextEventDelta(4);
}
}

View File

@ -17,7 +17,7 @@
#define __COP0_H__
extern void __fastcall WriteCP0Status(u32 value);
extern void UpdateCP0Status();
extern void cpuUpdateOperationMode();
extern void WriteTLB(int i);
extern void UnmapTLB(int i);
extern void MapTLB(int i);

View File

@ -93,7 +93,7 @@ static __fi void _rcntSet( int cntidx )
if (c < nextCounter)
{
nextCounter = c;
cpuSetNextBranch( nextsCounter, nextCounter ); //Need to update on counter resets/target changes
cpuSetNextEvent( nextsCounter, nextCounter ); //Need to update on counter resets/target changes
}
// Ignore target diff if target is currently disabled.
@ -111,7 +111,7 @@ static __fi void _rcntSet( int cntidx )
if (c < nextCounter)
{
nextCounter = c;
cpuSetNextBranch( nextsCounter, nextCounter ); //Need to update on counter resets/target changes
cpuSetNextEvent( nextsCounter, nextCounter ); //Need to update on counter resets/target changes
}
}
}
@ -419,7 +419,7 @@ __fi void rcntUpdate_hScanline()
{
if( !cpuTestCycle( hsyncCounter.sCycle, hsyncCounter.CycleT ) ) return;
//iopBranchAction = 1;
//iopEventAction = 1;
if (hsyncCounter.Mode & MODE_HBLANK) { //HBLANK Start
rcntStartGate(false, hsyncCounter.sCycle);
psxCheckStartGate16(0);
@ -890,6 +890,6 @@ void SaveStateBase::rcntFreeze()
for( int i=0; i<4; i++ )
_rcntSetGate( i );
iopBranchAction = 1; // probably not needed but won't hurt anything either.
iopEventAction = 1; // probably not needed but won't hurt anything either.
}
}

View File

@ -55,7 +55,7 @@ void iDumpPsxRegisters(u32 startpc, u32 temp)
for(i = 0; i < 34; i+=2) __Log("%spsx%s: %x %x", pstr, disRNameGPR[i], psxRegs.GPR.r[i], psxRegs.GPR.r[i+1]);
DbgCon.WriteLn("%scycle: %x %x %x; counters %x %x", pstr, psxRegs.cycle, g_psxNextBranchCycle, EEsCycle,
DbgCon.WriteLn("%scycle: %x %x %x; counters %x %x", pstr, psxRegs.cycle, g_iopNextEventCycle, EEsCycle,
psxNextsCounter, psxNextCounter);
DbgCon.WriteLn(wxsFormat(L"psxdma%d ", 2) + hw_dma(2).desc());
@ -109,7 +109,7 @@ void iDumpRegisters(u32 startpc, u32 temp)
__Log("%svfACC: %x %x %x %x", pstr, VU0.ACC.UL[3], VU0.ACC.UL[2], VU0.ACC.UL[1], VU0.ACC.UL[0]);
__Log("%sLO: %x_%x_%x_%x, HI: %x_%x_%x_%x", pstr, cpuRegs.LO.UL[3], cpuRegs.LO.UL[2], cpuRegs.LO.UL[1], cpuRegs.LO.UL[0],
cpuRegs.HI.UL[3], cpuRegs.HI.UL[2], cpuRegs.HI.UL[1], cpuRegs.HI.UL[0]);
__Log("%sCycle: %x %x, Count: %x", pstr, cpuRegs.cycle, g_nextBranchCycle, cpuRegs.CP0.n.Count);
__Log("%sCycle: %x %x, Count: %x", pstr, cpuRegs.cycle, g_nextEventCycle, cpuRegs.CP0.n.Count);
iDumpPsxRegisters(psxRegs.pc, temp);

View File

@ -53,7 +53,7 @@ void __fastcall ReadFIFO_VIF1(mem128_t* out)
if (vif1Regs.stat.FQC > 0)
{
GetMTGS().WaitGS();
GSreadFIFO(&psHu64(VIF1_FIFO));
GSreadFIFO((u64*)out);
vif1.GSLastDownloadSize--;
if (vif1.GSLastDownloadSize <= 16)
gifRegs.stat.OPH = false;
@ -61,7 +61,6 @@ void __fastcall ReadFIFO_VIF1(mem128_t* out)
}
}
CopyQWC( out, &psHu128(VIF1_FIFO) );
VIF_LOG("ReadFIFO/VIF1 -> %ls", out->ToString().c_str());
}
@ -72,8 +71,6 @@ void __fastcall WriteFIFO_VIF0(const mem128_t *value)
{
VIF_LOG("WriteFIFO/VIF0 <- %ls", value->ToString().c_str());
CopyQWC(&psHu128(VIF0_FIFO), value);
vif0ch.qwc += 1;
if(vif0.irqoffset != 0 && vif0.vifstalled == true) DevCon.Warning("Offset on VIF0 FIFO start!");
bool ret = VIF0transfer((u32*)value, 4);
@ -94,8 +91,6 @@ void __fastcall WriteFIFO_VIF1(const mem128_t *value)
{
VIF_LOG("WriteFIFO/VIF1 <- %ls", value->ToString().c_str());
CopyQWC(&psHu128(VIF1_FIFO), value);
if (vif1Regs.stat.FDR)
DevCon.Warning("writing to fifo when fdr is set!");
if (vif1Regs.stat.test(VIF1_STAT_INT | VIF1_STAT_VSS | VIF1_STAT_VIS | VIF1_STAT_VFS) )
@ -123,18 +118,15 @@ void __fastcall WriteFIFO_VIF1(const mem128_t *value)
pxAssertDev( ret, "vif stall code not implemented" );
}
// Dummy GIF-TAG Packet to Guarantee Count = 1
__aligned16 u128 nloop0_packet;
void __fastcall WriteFIFO_GIF(const mem128_t *value)
{
GIF_LOG("WriteFIFO/GIF <- %ls", value->ToString().c_str());
CopyQWC(&psHu128(GIF_FIFO), value);
CopyQWC(&nloop0_packet, value);
//CopyQWC(&psHu128(GIF_FIFO), value);
//CopyQWC(&nloop0_packet, value);
GetMTGS().PrepDataPacket(GIF_PATH_3, 1);
GIFPath_CopyTag( GIF_PATH_3, &nloop0_packet, 1 );
GIFPath_CopyTag( GIF_PATH_3, value, 1 );
GetMTGS().SendDataPacket();
if(GSTransferStatus.PTH3 == STOPPED_MODE && gifRegs.stat.APATH == GIF_APATH3 )
{

View File

@ -27,8 +27,8 @@ static __fi void IntCHackCheck()
{
// Sanity check: To protect from accidentally "rewinding" the cyclecount
// on the few times nextBranchCycle can be behind our current cycle.
s32 diff = g_nextBranchCycle - cpuRegs.cycle;
if( diff > 0 ) cpuRegs.cycle = g_nextBranchCycle;
s32 diff = g_nextEventCycle - cpuRegs.cycle;
if( diff > 0 ) cpuRegs.cycle = g_nextEventCycle;
}
static const uint HwF_VerboseConLog = 1<<0;
@ -48,7 +48,15 @@ mem32_t __fastcall _hwRead32(u32 mem)
case 0x02: return ipuRead32( mem );
case 0x03: return dmacRead32<0x03>( mem );
case 0x03:
if (mem >= EEMemoryMap::VIF0_Start)
{
if(mem >= EEMemoryMap::VIF1_Start)
return vifRead32<1>(mem);
else
return vifRead32<0>(mem);
}
return dmacRead32<0x03>( mem );
case 0x04:
case 0x05:
@ -65,7 +73,7 @@ mem32_t __fastcall _hwRead32(u32 mem)
DevCon.WriteLn( Color_Cyan, "Reading 32-bit FIFO data" );
u128 out128;
_hwRead128<page>(mem, &out128);
_hwRead128<page>(mem & ~0x0f, &out128);
return out128._u32[(mem >> 2) & 0x3];
}
break;
@ -221,7 +229,7 @@ static void _hwRead64(u32 mem, mem64_t* result )
DevCon.WriteLn( Color_Cyan, "Reading 64-bit FIFO data (%s 64 bits discarded)", wordpart ? "upper" : "lower" );
u128 out128;
_hwRead128<page>(mem, &out128);
_hwRead128<page>(mem & ~0x0f, &out128);
*result = out128._u64[wordpart];
}
return;

View File

@ -68,7 +68,7 @@ void __fastcall _hwWrite32( u32 mem, u32 value )
zerofill._u32[(mem >> 2) & 0x03] = value;
DevCon.WriteLn( Color_Cyan, "Writing 32-bit FIFO data (zero-extended to 128 bits)" );
_hwWrite128<page>(mem, &zerofill);
_hwWrite128<page>(mem & ~0x0f, &zerofill);
}
return;
@ -301,7 +301,7 @@ void __fastcall _hwWrite64( u32 mem, const mem64_t* srcval )
u128 zerofill = u128::From32(0);
zerofill._u64[(mem >> 3) & 0x01] = *srcval;
hwWrite128<page>(mem, &zerofill);
hwWrite128<page>(mem & ~0x0f, &zerofill);
}
return;

View File

@ -381,7 +381,10 @@ __fi void dmaIPU0() // fromIPU
ipu0dma.chcr.STR = false;
hwDmacIrq(DMAC_FROM_IPU);
}
IPUProcessInterrupt();
//IPUProcessInterrupt();
extern void IPUWorker();
if (ipuRegs.ctrl.BUSY) IPUWorker();
}
__fi void dmaIPU1() // toIPU

View File

@ -375,7 +375,7 @@ static void intReset()
static void intEventTest()
{
// Perform counters, ints, and IOP updates:
_cpuBranchTest_Shared();
_cpuEventTest_Shared();
}
static void intExecute()

View File

@ -391,7 +391,7 @@ void psxRcntUpdate()
int i;
//u32 change = 0;
g_psxNextBranchCycle = psxRegs.cycle + 32;
g_iopNextEventCycle = psxRegs.cycle + 32;
psxNextCounter = 0x7fffffff;
psxNextsCounter = psxRegs.cycle;

View File

@ -51,10 +51,10 @@ static void __fastcall psxDmaGeneric(u32 madr, u32 bcr, u32 chcr, u32 spuCore, _
if (psxCounters[6].CycleT < psxNextCounter)
psxNextCounter = psxCounters[6].CycleT;
if((g_psxNextBranchCycle - psxNextsCounter) > (u32)psxNextCounter)
if((g_iopNextEventCycle - psxNextsCounter) > (u32)psxNextCounter)
{
//DevCon.Warning("SPU2async Setting new counter branch, old %x new %x ((%x - %x = %x) > %x delta)", g_psxNextBranchCycle, psxNextsCounter + psxNextCounter, g_psxNextBranchCycle, psxNextsCounter, (g_psxNextBranchCycle - psxNextsCounter), psxNextCounter);
g_psxNextBranchCycle = psxNextsCounter + psxNextCounter;
//DevCon.Warning("SPU2async Setting new counter branch, old %x new %x ((%x - %x = %x) > %x delta)", g_iopNextEventCycle, psxNextsCounter + psxNextCounter, g_iopNextEventCycle, psxNextsCounter, (g_iopNextEventCycle - psxNextsCounter), psxNextCounter);
g_iopNextEventCycle = psxNextsCounter + psxNextCounter;
}
}

View File

@ -394,7 +394,6 @@
<Unit filename="../Vif_Transfer.cpp" />
<Unit filename="../Vif_Unpack.cpp" />
<Unit filename="../Vif_Unpack.h" />
<Unit filename="../Vif_Unpack.inl" />
<Unit filename="../ZipTools/ThreadedZipTools.h" />
<Unit filename="../ZipTools/thread_gzip.cpp" />
<Unit filename="../ZipTools/thread_lzma.cpp" />

View File

@ -29,22 +29,22 @@ u32 g_psxConstRegs[32];
u32 g_psxHasConstReg, g_psxFlushedConstReg;
// Controls when branch tests are performed.
u32 g_psxNextBranchCycle = 0;
u32 g_iopNextEventCycle = 0;
// This value is used when the IOP execution is broken to return control to the EE.
// (which happens when the IOP throws EE-bound interrupts). It holds the value of
// psxCycleEE (which is set to zero to facilitate the code break), so that the unrun
// iopCycleEE (which is set to zero to facilitate the code break), so that the unrun
// cycles can be accounted for later.
s32 psxBreak = 0;
s32 iopBreak = 0;
// tracks the IOP's current sync status with the EE. When it dips below zero,
// control is returned to the EE.
s32 psxCycleEE = -1;
s32 iopCycleEE = -1;
// Used to signal to the EE when important actions that need IOP-attention have
// happened (hsyncs, vsyncs, IOP exceptions, etc). IOP runs code whenever this
// is true, even if it's already running ahead a bit.
bool iopBranchAction = false;
bool iopEventAction = false;
bool iopEventTestIsActive = false;
@ -58,9 +58,9 @@ void psxReset()
psxRegs.CP0.n.Status = 0x10900000; // COP0 enabled | BEV = 1 | TS = 1
psxRegs.CP0.n.PRid = 0x0000001f; // PRevID = Revision ID, same as the IOP R3000A
psxBreak = 0;
psxCycleEE = -1;
g_psxNextBranchCycle = psxRegs.cycle + 4;
iopBreak = 0;
iopCycleEE = -1;
g_iopNextEventCycle = psxRegs.cycle + 4;
psxHwReset();
@ -113,8 +113,8 @@ __fi void psxSetNextBranch( u32 startCycle, s32 delta )
// typecast the conditional to signed so that things don't blow up
// if startCycle is greater than our next branch cycle.
if( (int)(g_psxNextBranchCycle - startCycle) > delta )
g_psxNextBranchCycle = startCycle + delta;
if( (int)(g_iopNextEventCycle - startCycle) > delta )
g_iopNextEventCycle = startCycle + delta;
}
__fi void psxSetNextBranchDelta( s32 delta )
@ -151,13 +151,13 @@ __fi void PSX_INT( IopEventId n, s32 ecycle )
psxSetNextBranchDelta( ecycle );
if( psxCycleEE < 0 )
if( iopCycleEE < 0 )
{
// The EE called this int, so inform it to branch as needed:
// fixme - this doesn't take into account EE/IOP sync (the IOP may be running
// ahead or behind the EE as per the EEsCycles value)
s32 iopDelta = (g_psxNextBranchCycle-psxRegs.cycle)*8;
cpuSetNextBranchDelta( iopDelta );
s32 iopDelta = (g_iopNextEventCycle-psxRegs.cycle)*8;
cpuSetNextEventDelta( iopDelta );
}
}
@ -211,18 +211,18 @@ static __fi void _psxTestInterrupts()
}
}
__ri void psxBranchTest()
__ri void iopEventTest()
{
if( psxTestCycle( psxNextsCounter, psxNextCounter ) )
{
psxRcntUpdate();
iopBranchAction = true;
iopEventAction = true;
}
else
{
// start the next branch at the next counter event by default
// the interrupt code below will assign nearer branches if needed.
g_psxNextBranchCycle = psxNextsCounter+psxNextCounter;
g_iopNextEventCycle = psxNextsCounter+psxNextCounter;
}
@ -239,7 +239,7 @@ __ri void psxBranchTest()
{
PSXCPU_LOG("Interrupt: %x %x", psxHu32(0x1070), psxHu32(0x1074));
psxException(0, 0);
iopBranchAction = true;
iopEventAction = true;
// No need to execute the SIFhack after cpuExceptions, since these by nature break SIF's
// thread sleep hangs and allow the IOP to "come back to life."
@ -258,9 +258,9 @@ void iopTestIntc()
// An iop exception has occurred while the EE is running code.
// Inform the EE to branch so the IOP can handle it promptly:
cpuSetNextBranchDelta( 16 );
iopBranchAction = true;
//Console.Error( "** IOP Needs an EE EventText, kthx ** %d", psxCycleEE );
cpuSetNextEventDelta( 16 );
iopEventAction = true;
//Console.Error( "** IOP Needs an EE EventText, kthx ** %d", iopCycleEE );
// Note: No need to set the iop's branch delta here, since the EE
// will run an IOP branch test regardless.

View File

@ -117,9 +117,9 @@ struct psxRegisters {
extern __aligned16 psxRegisters psxRegs;
extern u32 g_psxNextBranchCycle;
extern s32 psxBreak; // used when the IOP execution is broken and control returned to the EE
extern s32 psxCycleEE; // tracks IOP's current sych status with the EE
extern u32 g_iopNextEventCycle;
extern s32 iopBreak; // used when the IOP execution is broken and control returned to the EE
extern s32 iopCycleEE; // tracks IOP's current sych status with the EE
#ifndef _PC_
@ -172,7 +172,7 @@ extern u32 EEoCycle;
extern s32 psxNextCounter;
extern u32 psxNextsCounter;
extern bool iopBranchAction;
extern bool iopEventAction;
extern bool iopEventTestIsActive;
// Branching status used when throwing exceptions.
@ -196,7 +196,7 @@ extern R3000Acpu psxRec;
extern void psxReset();
extern void __fastcall psxException(u32 code, u32 step);
extern void psxBranchTest();
extern void iopEventTest();
extern void psxMemReset();
// Subsets

View File

@ -133,7 +133,7 @@ static __fi void execI()
psxRegs.pc+= 4;
psxRegs.cycle++;
psxCycleEE-=8;
iopCycleEE-=8;
psxBSC[psxRegs.code >> 26]();
}
@ -147,7 +147,7 @@ static void doBranch(s32 tar) {
iopIsDelaySlot = false;
psxRegs.pc = branchPC;
psxBranchTest();
iopEventTest();
}
static void intAlloc() {
@ -162,16 +162,16 @@ static void intExecute() {
static s32 intExecuteBlock( s32 eeCycles )
{
psxBreak = 0;
psxCycleEE = eeCycles;
iopBreak = 0;
iopCycleEE = eeCycles;
while (psxCycleEE > 0){
while (iopCycleEE > 0){
branch2 = 0;
while (!branch2) {
execI();
}
}
return psxBreak + psxCycleEE;
return iopBreak + iopCycleEE;
}
static void intClear(u32 Addr, u32 Size) {

View File

@ -71,7 +71,7 @@ void cpuReset()
fpuRegs.fprc[0] = 0x00002e00; // fpu Revision..
fpuRegs.fprc[31] = 0x01000001; // fpu Status/Control
g_nextBranchCycle = cpuRegs.cycle + 4;
g_nextEventCycle = cpuRegs.cycle + 4;
EEsCycle = 0;
EEoCycle = cpuRegs.cycle;
@ -236,21 +236,21 @@ void cpuTestMissingHwInts() {
}
// sets a branch test to occur some time from an arbitrary starting point.
__fi void cpuSetNextBranch( u32 startCycle, s32 delta )
__fi void cpuSetNextEvent( u32 startCycle, s32 delta )
{
// typecast the conditional to signed so that things don't blow up
// if startCycle is greater than our next branch cycle.
if( (int)(g_nextBranchCycle - startCycle) > delta )
if( (int)(g_nextEventCycle - startCycle) > delta )
{
g_nextBranchCycle = startCycle + delta;
g_nextEventCycle = startCycle + delta;
}
}
// sets a branch to occur some time from the current cycle
__fi void cpuSetNextBranchDelta( s32 delta )
__fi void cpuSetNextEventDelta( s32 delta )
{
cpuSetNextBranch( cpuRegs.cycle, delta );
cpuSetNextEvent( cpuRegs.cycle, delta );
}
// tests the cpu cycle agaisnt the given start and delta values.
@ -264,9 +264,9 @@ __fi int cpuTestCycle( u32 startCycle, s32 delta )
}
// tells the EE to run the branch test the next time it gets a chance.
__fi void cpuSetBranch()
__fi void cpuSetEvent()
{
g_nextBranchCycle = cpuRegs.cycle;
g_nextEventCycle = cpuRegs.cycle;
}
__fi void cpuClearInt( uint i )
@ -285,7 +285,7 @@ static __fi void TESTINT( u8 n, void (*callback)() )
callback();
}
else
cpuSetNextBranch( cpuRegs.sCycle[n], cpuRegs.eCycle[n] );
cpuSetNextEvent( cpuRegs.sCycle[n], cpuRegs.eCycle[n] );
}
// [TODO] move this function to LegacyDmac.cpp, and remove most of the DMAC-related headers from
@ -330,7 +330,7 @@ static __fi void _cpuTestTIMR()
s_iLastCOP0Cycle = cpuRegs.cycle;
// fixme: this looks like a hack to make up for the fact that the TIMR
// doesn't yet have a proper mechanism for setting itself up on a nextBranchCycle.
// doesn't yet have a proper mechanism for setting itself up on a nextEventCycle.
// A proper fix would schedule the TIMR to trigger at a specific cycle anytime
// the Count or Compare registers are modified.
@ -365,15 +365,15 @@ static bool cpuIntsEnabled(int Interrupt)
!cpuRegs.CP0.n.Status.b.EXL && (cpuRegs.CP0.n.Status.b.ERL == 0) && IntType;
}
// if cpuRegs.cycle is greater than this cycle, should check cpuBranchTest for updates
u32 g_nextBranchCycle = 0;
// if cpuRegs.cycle is greater than this cycle, should check cpuEventTest for updates
u32 g_nextEventCycle = 0;
// Shared portion of the branch test, called from both the Interpreter
// and the recompiler. (moved here to help alleviate redundant code)
__fi void _cpuBranchTest_Shared()
__fi void _cpuEventTest_Shared()
{
ScopedBool etest(eeEventTestIsActive);
g_nextBranchCycle = cpuRegs.cycle + eeWaitCycles;
g_nextEventCycle = cpuRegs.cycle + eeWaitCycles;
// ---- Counters -------------
// Important: the vsync counter must be the first to be checked. It includes emulation
@ -397,23 +397,23 @@ __fi void _cpuBranchTest_Shared()
_cpuTestInterrupts();
// ---- IOP -------------
// * It's important to run a psxBranchTest before calling ExecuteBlock. This
// * It's important to run a iopEventTest before calling ExecuteBlock. This
// is because the IOP does not always perform branch tests before returning
// (during the prev branch) and also so it can act on the state the EE has
// given it before executing any code.
//
// * The IOP cannot always be run. If we run IOP code every time through the
// cpuBranchTest, the IOP generally starts to run way ahead of the EE.
// cpuEventTest, the IOP generally starts to run way ahead of the EE.
EEsCycle += cpuRegs.cycle - EEoCycle;
EEoCycle = cpuRegs.cycle;
if( EEsCycle > 0 )
iopBranchAction = true;
iopEventAction = true;
psxBranchTest();
iopEventTest();
if( iopBranchAction )
if( iopEventAction )
{
//if( EEsCycle < -450 )
// Console.WriteLn( " IOP ahead by: %d cycles", -EEsCycle );
@ -424,34 +424,11 @@ __fi void _cpuBranchTest_Shared()
// run closely in sync during raised exception events. But in practice it didn't
// seem to make much of a difference.
// Note: The IOP is very good about chaining blocks together so it tends to
// run lots of cycles, even with only 32 (4 IOP) cycles specified here. That's
// probably why it doesn't improve sync much.
/*bool eeExceptPending = cpuIntsEnabled() &&
//( cpuRegs.CP0.n.Status.b.EIE && cpuRegs.CP0.n.Status.b.IE && (cpuRegs.CP0.n.Status.b.ERL == 0) ) &&
//( (cpuRegs.CP0.n.Status.val & 0x10007) == 0x10001 ) &&
( (cpuRegs.interrupt & (3<<30)) != 0 );
if( eeExceptPending )
{
// ExecuteBlock returns a negative value, so subtract it from the cycle count
// specified to get the total cycles processed! :D
int cycleCount = std::min( EEsCycle, (s32)(eeWaitCycles>>4) );
int cyclesRun = cycleCount - psxCpu->ExecuteBlock( cycleCount );
EEsCycle -= cyclesRun;
//Console.Warning( "IOP Exception-Pending Execution -- EEsCycle: %d", EEsCycle );
}
else*/
{
EEsCycle = psxCpu->ExecuteBlock( EEsCycle );
}
iopBranchAction = false;
iopEventAction = false;
}
// ---- VU0 -------------
// We're in a BranchTest. All dynarec registers are flushed
// We're in a EventTest. All dynarec registers are flushed
// so there is no need to freeze registers here.
CpuVU0->ExecuteBlock();
@ -466,19 +443,19 @@ __fi void _cpuBranchTest_Shared()
// EE's running way ahead of the IOP still, so we should branch quickly to give the
// IOP extra timeslices in short order.
cpuSetNextBranchDelta( 48 );
//Console.Warning( "EE ahead of the IOP -- Rapid Branch! %d", EEsCycle );
cpuSetNextEventDelta( 48 );
//Console.Warning( "EE ahead of the IOP -- Rapid Event! %d", EEsCycle );
}
// The IOP could be running ahead/behind of us, so adjust the iop's next branch by its
// relative position to the EE (via EEsCycle)
cpuSetNextBranchDelta( ((g_psxNextBranchCycle-psxRegs.cycle)*8) - EEsCycle );
cpuSetNextEventDelta( ((g_iopNextEventCycle-psxRegs.cycle)*8) - EEsCycle );
// Apply the hsync counter's nextCycle
cpuSetNextBranch( hsyncCounter.sCycle, hsyncCounter.CycleT );
cpuSetNextEvent( hsyncCounter.sCycle, hsyncCounter.CycleT );
// Apply vsync and other counter nextCycles
cpuSetNextBranch( nextsCounter, nextCounter );
cpuSetNextEvent( nextsCounter, nextCounter );
// ---- INTC / DMAC Exceptions -----------------
// Raise the INTC and DMAC interrupts here, which usually throw exceptions.
@ -501,15 +478,11 @@ __ri void cpuTestINTCInts()
if( (psHu32(INTC_STAT) & psHu32(INTC_MASK)) == 0 ) return;
cpuRegs.interrupt|= 1 << 30;
cpuRegs.sCycle[30] = cpuRegs.cycle;
cpuRegs.eCycle[30] = 4; //Needs to be 4 to account for bus delays/pipelines etc
cpuSetNextBranchDelta( 4 );
if(eeEventTestIsActive && (psxCycleEE > 0))
cpuSetNextEventDelta( 4 );
if(eeEventTestIsActive && (iopCycleEE > 0))
{
psxBreak += psxCycleEE; // record the number of cycles the IOP didn't run.
psxCycleEE = 0;
iopBreak += iopCycleEE; // record the number of cycles the IOP didn't run.
iopCycleEE = 0;
}
}
@ -525,15 +498,11 @@ __fi void cpuTestDMACInts()
if ( ( (psHu16(0xe012) & psHu16(0xe010)) == 0) &&
( (psHu16(0xe010) & 0x8000) == 0) ) return;
cpuRegs.interrupt|= 1 << 31;
cpuRegs.sCycle[31] = cpuRegs.cycle;
cpuRegs.eCycle[31] = 4; //Needs to be 4 to account for bus delays/pipelines etc
cpuSetNextBranchDelta( 4 );
if(eeEventTestIsActive && (psxCycleEE > 0))
cpuSetNextEventDelta( 4 );
if(eeEventTestIsActive && (iopCycleEE > 0))
{
psxBreak += psxCycleEE; // record the number of cycles the IOP didn't run.
psxCycleEE = 0;
iopBreak += iopCycleEE; // record the number of cycles the IOP didn't run.
iopCycleEE = 0;
}
}
@ -567,16 +536,16 @@ __fi void CPU_INT( EE_EventType n, s32 ecycle)
// Interrupt is happening soon: make sure both EE and IOP are aware.
if( ecycle <= 28 && psxCycleEE > 0 )
if( ecycle <= 28 && iopCycleEE > 0 )
{
// If running in the IOP, force it to break immediately into the EE.
// the EE's branch test is due to run.
psxBreak += psxCycleEE; // record the number of cycles the IOP didn't run.
psxCycleEE = 0;
iopBreak += iopCycleEE; // record the number of cycles the IOP didn't run.
iopCycleEE = 0;
}
cpuSetNextBranchDelta( cpuRegs.eCycle[n] );
cpuSetNextEventDelta( cpuRegs.eCycle[n] );
}
// Called from recompilers; __fastcall define is mandatory.

View File

@ -244,7 +244,7 @@ extern __aligned16 cpuRegisters cpuRegs;
extern __aligned16 fpuRegisters fpuRegs;
extern __aligned16 tlbs tlb[48];
extern u32 g_nextBranchCycle;
extern u32 g_nextEventCycle;
extern bool eeEventTestIsActive;
extern u32 s_iLastCOP0Cycle;
extern u32 s_iLastPERFCycle[2];
@ -415,12 +415,12 @@ extern void cpuTlbMissW(u32 addr, u32 bd);
extern void cpuTestHwInts();
extern void cpuClearInt(uint n);
extern void cpuSetNextBranch( u32 startCycle, s32 delta );
extern void cpuSetNextBranchDelta( s32 delta );
extern void cpuSetNextEvent( u32 startCycle, s32 delta );
extern void cpuSetNextEventDelta( s32 delta );
extern int cpuTestCycle( u32 startCycle, s32 delta );
extern void cpuSetBranch();
extern void cpuSetEvent();
extern void _cpuBranchTest_Shared(); // for internal use by the Dynarecs and Ints inside R5900:
extern void _cpuEventTest_Shared(); // for internal use by the Dynarecs and Ints inside R5900:
extern void cpuTestINTCInts();
extern void cpuTestDMACInts();

View File

@ -179,8 +179,8 @@ void SaveStateBase::FreezeRegisters()
FreezeTag( "Cycles" );
Freeze(EEsCycle);
Freeze(EEoCycle);
Freeze(g_nextBranchCycle);
Freeze(g_psxNextBranchCycle);
Freeze(g_nextEventCycle);
Freeze(g_iopNextEventCycle);
Freeze(s_iLastCOP0Cycle);
Freeze(s_iLastPERFCycle);

View File

@ -24,7 +24,7 @@
// the lower 16 bit value. IF the change is breaking of all compatibility with old
// states, increment the upper 16 bit value, and clear the lower 16 bits to 0.
static const u32 g_SaveVersion = 0x8b490000;
static const u32 g_SaveVersion = 0x8b4a0000;
// this function is meant to be used in the place of GSfreeze, and provides a safe layer
// between the GS saving function and the MTGS's needs. :)

View File

@ -33,7 +33,7 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) {
// Let VUs run behind EE instead of ahead
if (stat & test) {
cpuSetNextBranchDelta((s+c)*2);
cpuSetNextEventDelta((s+c)*2);
m_lastEEcycles = cpuRegs.cycle + (s*2);
}
}
@ -43,11 +43,11 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) {
delta >>= 1; // Divide by 2 (unsigned)
Execute(delta); // Execute the time since the last call
if (stat & test) {
cpuSetNextBranchDelta(c*2);
cpuSetNextEventDelta(c*2);
m_lastEEcycles = cpuRegs.cycle;
}
}
else cpuSetNextBranchDelta(-delta); // Haven't caught-up from kick start
else cpuSetNextEventDelta(-delta); // Haven't caught-up from kick start
}
}
@ -63,7 +63,7 @@ void __fastcall BaseVUmicroCPU::ExecuteBlockJIT(BaseVUmicroCPU* cpu) {
cpu->Execute(c); // Execute VU
if (stat & test) {
cpu->m_lastEEcycles+=(c*2);
cpuSetNextBranchDelta(c*2);
cpuSetNextEventDelta(c*2);
}
}
}
@ -80,7 +80,7 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) {
// If the VU0 program didn't finish then we'll want to finish it up
// pretty soon. This fixes vmhacks in some games (Naruto Ultimate Ninja 2)
if(VU0.VI[REG_VPU_STAT].UL & vuRunning)
cpuSetNextBranchDelta( 192 ); // fixme : ideally this should be higher, like 512 or so.
cpuSetNextEventDelta( 192 ); // fixme : ideally this should be higher, like 512 or so.
}
else {
Execute(vu0RunCycles);
@ -89,7 +89,7 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) {
// This helps keep the EE and VU0 in sync.
// Check Silver Surfer. Currently has SPS varying with different branch deltas set below.
if(VU0.VI[REG_VPU_STAT].UL & vuRunning)
cpuSetNextBranchDelta( 768 );
cpuSetNextEventDelta( 768 );
}
}

View File

@ -21,8 +21,8 @@
#include "GS.h"
#include "Gif.h"
vifStruct vif0;
vifStruct vif1;
__aligned16 vifStruct vif0, vif1;
tGSTransferStatus GSTransferStatus((STOPPED_MODE<<8) | (STOPPED_MODE<<4) | STOPPED_MODE);
void vif0Reset()
@ -31,14 +31,6 @@ void vif0Reset()
memzero(vif0);
memzero(vif0Regs);
psHu64(VIF0_FIFO) = 0;
psHu64(VIF0_FIFO + 8) = 0;
vif0Regs.stat.VPS = VPS_IDLE;
vif0Regs.stat.FQC = 0;
vif0.done = false;
resetNewVif(0);
}
@ -48,15 +40,6 @@ void vif1Reset()
memzero(vif1);
memzero(vif1Regs);
psHu64(VIF1_FIFO) = 0;
psHu64(VIF1_FIFO + 8) = 0;
vif1Regs.stat.VPS = VPS_IDLE;
vif1Regs.stat.FQC = 0; // FQC=0
vif1.done = false;
cpuRegs.interrupt &= ~((1 << 1) | (1 << 10)); //Stop all vif1 DMA's
resetNewVif(1);
}
@ -64,7 +47,6 @@ void SaveStateBase::vif0Freeze()
{
FreezeTag("VIFdma");
Freeze(g_vifCycles); // Dunno if this one is needed, but whatever, it's small. :)
Freeze(g_vifmask); // mask settings for VIF0 and VIF1
Freeze(vif0);
Freeze(nVif[0].bSize);
@ -153,6 +135,7 @@ __fi void vif1FBRST(u32 value) {
if (FBRST(value).RST) // Reset Vif.
{
memzero(vif1);
//cpuRegs.interrupt &= ~((1 << 1) | (1 << 10)); //Stop all vif1 DMA's
vif1ch.qwc -= min((int)vif1ch.qwc, 16); //?
psHu64(VIF1_FIFO) = 0;
@ -277,9 +260,29 @@ __fi void vif1STAT(u32 value) {
#define caseVif(x) (idx ? VIF1_##x : VIF0_##x)
_vifT __fi u32 vifRead32(u32 mem) {
vifStruct& vif = GetVifX;
switch (mem) {
case caseVif(ROW0): return vif.MaskRow._u32[0];
case caseVif(ROW1): return vif.MaskRow._u32[1];
case caseVif(ROW2): return vif.MaskRow._u32[2];
case caseVif(ROW3): return vif.MaskRow._u32[3];
case caseVif(COL0): return vif.MaskCol._u32[0];
case caseVif(COL1): return vif.MaskCol._u32[1];
case caseVif(COL2): return vif.MaskCol._u32[2];
case caseVif(COL3): return vif.MaskCol._u32[3];
}
return psHu32(mem);
}
// returns FALSE if no writeback is needed (or writeback is handled internally)
// returns TRUE if the caller should writeback the value to the eeHw register map.
_vifT __fi bool vifWrite32(u32 mem, u32 value) {
vifStruct& vif = GetVifX;
switch (mem) {
case caseVif(MARK):
VIF_LOG("VIF%d_MARK write32 0x%8.8x", idx, value);
@ -303,33 +306,23 @@ _vifT __fi bool vifWrite32(u32 mem, u32 value) {
// standard register writes -- handled by caller.
break;
case caseVif(ROW0):
case caseVif(ROW1):
case caseVif(ROW2):
case caseVif(ROW3):
// Here's a neat way to obfuscate code. This is a super-fancy-complicated version
// of a standard psHu32(mem) = value; writeback. Handled by caller for us, thanks! --air
//if (!idx) g_vifmask.Row0[ (mem>>4)&3 ] = value;
//else g_vifmask.Row1[ (mem>>4)&3 ] = value;
//((u32*)&vifXRegs.r0) [((mem>>4)&3)*4] = value;
break;
case caseVif(ROW0): vif.MaskRow._u32[0] = value; return false;
case caseVif(ROW1): vif.MaskRow._u32[1] = value; return false;
case caseVif(ROW2): vif.MaskRow._u32[2] = value; return false;
case caseVif(ROW3): vif.MaskRow._u32[3] = value; return false;
case caseVif(COL0):
case caseVif(COL1):
case caseVif(COL2):
case caseVif(COL3):
// Here's a neat way to obfuscate code. This is a super-fancy-complicated version
// of a standard psHu32(mem) = value; writeback. Handled by caller for us, thanks! --air
//if (!idx) g_vifmask.Col0[ (mem>>4)&3 ] = value;
//else g_vifmask.Col1[ (mem>>4)&3 ] = value;
//((u32*)&vifXRegs.c0) [((mem>>4)&3)*4] = value;
break;
case caseVif(COL0): vif.MaskCol._u32[0] = value; return false;
case caseVif(COL1): vif.MaskCol._u32[1] = value; return false;
case caseVif(COL2): vif.MaskCol._u32[2] = value; return false;
case caseVif(COL3): vif.MaskCol._u32[3] = value; return false;
}
// fall-through case: issue standard writeback behavior.
return true;
}
template u32 vifRead32<0>(u32 mem);
template u32 vifRead32<1>(u32 mem);
template bool vifWrite32<0>(u32 mem, u32 value);
template bool vifWrite32<1>(u32 mem, u32 value);

View File

@ -213,8 +213,6 @@ struct VIFregisters {
u32 addr;
};
extern VIFregisters *vifRegs;
static VIFregisters& vif0Regs = (VIFregisters&)eeHw[0x3800];
static VIFregisters& vif1Regs = (VIFregisters&)eeHw[0x3C00];

View File

@ -58,7 +58,7 @@ __fi void vif1FLUSH()
void vif1TransferToMemory()
{
u32 size;
u64* pMem = (u64*)dmaGetAddr(vif1ch.madr, false);
u128* pMem = (u128*)dmaGetAddr(vif1ch.madr, false);
// VIF from gsMemory
if (pMem == NULL) //Is vif0ptag empty?
@ -78,54 +78,34 @@ void vif1TransferToMemory()
// completely and execute the transfer there-after.
//Console.Warning("Real QWC %x", vif1ch.qwc);
size = min((u32)vif1ch.qwc, vif1.GSLastDownloadSize);
const u128* pMemEnd = pMem + vif1.GSLastDownloadSize;
if (GSreadFIFO2 == NULL)
{
for (;size > 0; --size)
{
GetMTGS().WaitGS();
GSreadFIFO(&psHu64(VIF1_FIFO));
pMem[0] = psHu64(VIF1_FIFO);
pMem[1] = psHu64(VIF1_FIFO + 8);
pMem += 2;
}
if(vif1ch.qwc > vif1.GSLastDownloadSize)
{
DevCon.Warning("GS Transfer < VIF QWC, Clearing end of space");
for (size = vif1ch.qwc - vif1.GSLastDownloadSize; size > 0; --size)
{
psHu64(VIF1_FIFO) = 0;
psHu64(VIF1_FIFO + 8) = 0;
pMem[0] = psHu64(VIF1_FIFO);
pMem[1] = psHu64(VIF1_FIFO + 8);
pMem += 2;
}
GSreadFIFO((u64*)pMem);
++pMem;
}
}
else
{
GetMTGS().WaitGS();
GSreadFIFO2(pMem, size);
// set incase read
psHu64(VIF1_FIFO) = pMem[2*size-2];
psHu64(VIF1_FIFO + 8) = pMem[2*size-1];
pMem += size * 2;
if(vif1ch.qwc > vif1.GSLastDownloadSize)
{
DevCon.Warning("GS Transfer < VIF QWC, Clearing end of space");
for (size = vif1ch.qwc - vif1.GSLastDownloadSize; size > 0; --size)
{
psHu64(VIF1_FIFO) = 0;
psHu64(VIF1_FIFO + 8) = 0;
pMem[0] = psHu64(VIF1_FIFO);
pMem[1] = psHu64(VIF1_FIFO + 8);
pMem += 2;
}
}
GSreadFIFO2((u64*)pMem, size);
pMem += size;
}
if(pMem < pMemEnd)
{
DevCon.Warning("GS Transfer < VIF QWC, Clearing end of space");
__m128 zeroreg = _mm_setzero_ps();
do {
_mm_store_ps((float*)pMem, zeroreg);
++pMem;
} while (pMem < pMemEnd);
}
g_vifCycles += vif1ch.qwc * 2;
vif1ch.madr += vif1ch.qwc * 16; // mgs3 scene changes

View File

@ -19,16 +19,12 @@
#include "Gif.h"
#include "Vif_Dma.h"
VIFregisters *vifRegs;
vifStruct *vif;
u16 vifqwc = 0;
u32 g_vifCycles = 0;
u32 g_vu0Cycles = 0;
u32 g_vu1Cycles = 0;
u32 g_packetsizeonvu = 0;
__aligned16 VifMaskTypes g_vifmask;
extern u32 g_vifCycles;
static u32 qwctag(u32 mask)

View File

@ -38,8 +38,7 @@ static __fi void vifFlush(int idx) {
}
static __fi void vuExecMicro(int idx, u32 addr) {
VURegs* VU = nVif[idx].VU;
VIFregisters& vifRegs = VU->GetVifRegs();
VIFregisters& vifRegs = vifXRegs;
int startcycles = 0;
//vifFlush(idx);
@ -423,7 +422,7 @@ vifOp(vifCode_Offset) {
return 0;
}
template<int idx> static __fi int _vifCode_STColRow(const u32* data, u32* pmem1, u32* pmem2) {
template<int idx> static __fi int _vifCode_STColRow(const u32* data, u32* pmem2) {
vifStruct& vifX = GetVifX;
int ret = min(4 - vifX.tag.addr, vifX.vifpacketsize);
@ -432,16 +431,12 @@ template<int idx> static __fi int _vifCode_STColRow(const u32* data, u32* pmem1,
switch (ret) {
case 4:
pmem1[12] = data[3];
pmem2[3] = data[3];
case 3:
pmem1[8] = data[2];
pmem2[2] = data[2];
case 2:
pmem1[4] = data[1];
pmem2[1] = data[1];
case 1:
pmem1[0] = data[0];
pmem2[0] = data[0];
break;
jNO_DEFAULT
@ -462,10 +457,7 @@ vifOp(vifCode_STCol) {
return 1;
}
pass2 {
u32* cols = idx ? g_vifmask.Col1 : g_vifmask.Col0;
u32* pmem1 = &vifXRegs.c0 + (vifX.tag.addr << 2);
u32* pmem2 = cols + vifX.tag.addr;
return _vifCode_STColRow<idx>(data, pmem1, pmem2);
return _vifCode_STColRow<idx>(data, &vifX.MaskCol._u32[vifX.tag.addr]);
}
pass3 { VifCodeLog("STCol"); }
return 0;
@ -480,10 +472,7 @@ vifOp(vifCode_STRow) {
return 1;
}
pass2 {
u32* rows = idx ? g_vifmask.Row1 : g_vifmask.Row0;
u32* pmem1 = &vifXRegs.r0 + (vifX.tag.addr << 2);
u32* pmem2 = rows + vifX.tag.addr;
return _vifCode_STColRow<idx>(data, pmem1, pmem2);
return _vifCode_STColRow<idx>(data, &vifX.MaskRow._u32[vifX.tag.addr]);
}
pass3 { VifCodeLog("STRow"); }
return 0;
@ -516,11 +505,10 @@ vifOp(vifCode_STMod) {
vifOp(vifCode_Unpack) {
pass1 {
if (!idx) vifUnpackSetup<0>(data);
else vifUnpackSetup<1>(data);
vifUnpackSetup<idx>(data);
return 1;
}
pass2 { return nVifUnpack(idx, (u8*)data); }
pass2 { return nVifUnpack<idx>((u8*)data); }
pass3 { VifCodeLog("Unpack"); }
return 0;
}

View File

@ -56,6 +56,8 @@ union tTRXREG {
// NOTE, if debugging vif stalls, use sega classics, spyro, gt4, and taito
struct vifStruct {
u128 MaskRow, MaskCol;
vifCode tag;
int cmd;
int irq;
@ -82,10 +84,10 @@ struct vifStruct {
u8 GifWaitState; // 0 = General PATH checking, 1 = Flush path 3, 2 == Wait for VU1
};
extern vifStruct* vif;
extern vifStruct vif0, vif1;
extern __aligned16 vifStruct vif0, vif1;
extern u8 schedulepath3msk;
_vifT extern u32 vifRead32(u32 mem);
_vifT extern bool vifWrite32(u32 mem, u32 value);
extern void vif0Interrupt();
@ -122,15 +124,3 @@ extern u32 g_vu1Cycles;
extern u32 g_packetsizeonvu;
extern void vif0FLUSH();
extern void vif1FLUSH();
//------------------------------------------------------------------
// newVif SSE-optimized Row/Col Structs
//------------------------------------------------------------------
struct VifMaskTypes
{
u32 Row0[4], Col0[4];
u32 Row1[4], Col1[4];
};
extern __aligned16 VifMaskTypes g_vifmask; // This struct is used by newVif

View File

@ -25,7 +25,7 @@
// Doesn't stall if the next vifCode is the Mark command
_vifT bool runMark(u32* &data) {
if (((vifXRegs.code >> 24) & 0x7f) == 0x7) {
Console.WriteLn("Vif%d: Running Mark with I-bit", idx);
DevCon.WriteLn("Vif%d: Running Mark with I-bit", idx);
return 1; // No Stall?
}
return 1; // Stall

View File

@ -25,212 +25,99 @@ enum UnpackOffset {
OFFSET_W = 3
};
static __fi u32 setVifRowRegs(u32 reg, u32 data) {
switch (reg) {
case 0: vifRegs->r0 = data; break;
case 1: vifRegs->r1 = data; break;
case 2: vifRegs->r2 = data; break;
case 3: vifRegs->r3 = data; break;
jNO_DEFAULT;
}
static __fi u32 setVifRow(vifStruct& vif, u32 reg, u32 data) {
vif.MaskRow._u32[reg] = data;
return data;
}
static __fi u32 getVifRowRegs(u32 reg) {
switch (reg) {
case 0: return vifRegs->r0; break;
case 1: return vifRegs->r1; break;
case 2: return vifRegs->r2; break;
case 3: return vifRegs->r3; break;
jNO_DEFAULT;
}
return 0; // unreachable...
}
static __fi u32 getVifColRegs(u32 reg) {
switch (reg) {
case 0: return vifRegs->c0; break;
case 1: return vifRegs->c1; break;
case 2: return vifRegs->c2; break;
default: return vifRegs->c3; break;
}
return 0; // unreachable...
}
template< bool doMask >
// cycle derives from vif.cl
// mode derives from vifRegs.mode
template< uint idx, uint mode, bool doMask >
static __ri void writeXYZW(u32 offnum, u32 &dest, u32 data) {
u32 vifRowReg = getVifRowRegs(offnum);
int n = 0;
vifStruct& vif = GetVifX;
if (doMask) {
switch (vif->cl) {
case 0: n = (vifRegs->mask >> (offnum * 2)) & 0x3; break;
case 1: n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; break;
case 2: n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3; break;
default: n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3; break;
const VIFregisters& regs = vifXRegs;
switch (vif.cl) {
case 0: n = (regs.mask >> (offnum * 2)) & 0x3; break;
case 1: n = (regs.mask >> ( 8 + (offnum * 2))) & 0x3; break;
case 2: n = (regs.mask >> (16 + (offnum * 2))) & 0x3; break;
default: n = (regs.mask >> (24 + (offnum * 2))) & 0x3; break;
}
}
// Four possible types of masking are handled below:
// 0 - Data
// 1 - MaskRow
// 2 - MaskCol
// 3 - Write protect
switch (n) {
case 0:
if ((vif->cmd & 0x6F) != 0x6f) {
switch (vifRegs->mode) {
case 1: dest = data + vifRowReg; break;
case 2: dest = setVifRowRegs(offnum, vifRowReg + data); break;
default: dest = data; break;
}
switch (mode) {
case 1: dest = data + vif.MaskRow._u32[offnum]; break;
case 2: dest = setVifRow(vif, offnum, vif.MaskRow._u32[offnum] + data); break;
default: dest = data; break;
}
else dest = data; // v4-5 Unpack Mode
break;
case 1: dest = vifRowReg; break;
case 2: dest = getVifColRegs(vif->cl); break;
case 1: dest = vif.MaskRow._u32[offnum]; break;
case 2: dest = vif.MaskCol._u32[min(vif.cl,3)]; break;
case 3: break;
}
}
#define tParam idx,mode,doMask
template < bool doMask, class T >
static __fi void __fastcall UNPACK_S(u32 *dest, const T *data, int size)
template < uint idx, uint mode, bool doMask, class T >
static void __fastcall UNPACK_S(u32* dest, const T* src)
{
u32 data = *src;
//S-# will always be a complete packet, no matter what. So we can skip the offset bits
writeXYZW<doMask>(OFFSET_X, *dest++, *data);
writeXYZW<doMask>(OFFSET_Y, *dest++, *data);
writeXYZW<doMask>(OFFSET_Z, *dest++, *data);
writeXYZW<doMask>(OFFSET_W, *dest , *data);
writeXYZW<tParam>(OFFSET_X, *(dest+0), data);
writeXYZW<tParam>(OFFSET_Y, *(dest+1), data);
writeXYZW<tParam>(OFFSET_Z, *(dest+2), data);
writeXYZW<tParam>(OFFSET_W, *(dest+3), data);
}
template <bool doMask, class T>
static __ri void __fastcall UNPACK_V2(u32 *dest, const T *data, int size)
// The PS2 console actually writes v1v0v1v0 for all V2 unpacks -- the second v1v0 pair
// being officially "indeterminate" but some games very much depend on it.
template < uint idx, uint mode, bool doMask, class T >
static void __fastcall UNPACK_V2(u32* dest, const T* src)
{
if (vifRegs->offset == OFFSET_X)
{
if (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_Y;
size--;
}
}
if (vifRegs->offset == OFFSET_Y)
{
if (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data);
vifRegs->offset = OFFSET_Z;
size--;
}
}
if (vifRegs->offset == OFFSET_Z)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *dest-2);
vifRegs->offset = OFFSET_W;
}
if (vifRegs->offset == OFFSET_W)
{
writeXYZW<doMask>(vifRegs->offset, *dest, *data);
vifRegs->offset = OFFSET_X;
}
writeXYZW<tParam>(OFFSET_X, *(dest+0), *(src+0));
writeXYZW<tParam>(OFFSET_Y, *(dest+1), *(src+1));
writeXYZW<tParam>(OFFSET_Z, *(dest+2), *(src+0));
writeXYZW<tParam>(OFFSET_W, *(dest+3), *(src+1));
}
template <bool doMask, class T>
static __ri void __fastcall UNPACK_V3(u32 *dest, const T *data, int size)
// V3 and V4 unpacks both use the V4 unpack logic, even though most of the OFFSET_W fields
// during V3 unpacking end up being overwritten by the next unpack. This is confirmed real
// hardware behavior that games such as Ape Escape 3 depend on.
template < uint idx, uint mode, bool doMask, class T >
static void __fastcall UNPACK_V4(u32* dest, const T* src)
{
if(vifRegs->offset == OFFSET_X)
{
if (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_Y;
size--;
}
}
if(vifRegs->offset == OFFSET_Y)
{
if (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_Z;
size--;
}
}
if(vifRegs->offset == OFFSET_Z)
{
if (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_W;
size--;
}
}
if(vifRegs->offset == OFFSET_W)
{
// V3-# does some bizarre thing with alignment, every 6qw of data the W becomes 0 (strange console!)
// Ape Escape doesn't seem to like it tho (what the hell?) gonna have to investigate
writeXYZW<doMask>(vifRegs->offset, *dest, *data);
vifRegs->offset = OFFSET_X;
}
writeXYZW<tParam>(OFFSET_X, *(dest+0), *(src+0));
writeXYZW<tParam>(OFFSET_Y, *(dest+1), *(src+1));
writeXYZW<tParam>(OFFSET_Z, *(dest+2), *(src+2));
writeXYZW<tParam>(OFFSET_W, *(dest+3), *(src+3));
}
template <bool doMask, class T>
static __fi void __fastcall UNPACK_V4(u32 *dest, const T *data , int size)
// V4_5 unpacks do not support the MODE register, and act as mode==0 always.
template< uint idx, bool doMask >
static void __fastcall UNPACK_V4_5(u32 *dest, const u32* src)
{
while (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
vifRegs->offset++;
size--;
}
u32 data = *src;
if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X;
}
template< bool doMask >
static __ri void __fastcall UNPACK_V4_5(u32 *dest, const u32 *data, int size)
{
//As with S-#, this will always be a complete packet
writeXYZW<doMask>(OFFSET_X, *dest++, ((*data & 0x001f) << 3));
writeXYZW<doMask>(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2));
writeXYZW<doMask>(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7));
writeXYZW<doMask>(OFFSET_W, *dest, ((*data & 0x8000) >> 8));
writeXYZW<idx,0,doMask>(OFFSET_X, *(dest+0), ((data & 0x001f) << 3));
writeXYZW<idx,0,doMask>(OFFSET_Y, *(dest+1), ((data & 0x03e0) >> 2));
writeXYZW<idx,0,doMask>(OFFSET_Z, *(dest+2), ((data & 0x7c00) >> 7));
writeXYZW<idx,0,doMask>(OFFSET_W, *(dest+3), ((data & 0x8000) >> 8));
}
// =====================================================================================================
template < bool doMask, int size, class T >
static void __fastcall fUNPACK_S(u32 *dest, const T *data)
{
UNPACK_S<doMask>( dest, data, size );
}
template <bool doMask, int size, class T>
static void __fastcall fUNPACK_V2(u32 *dest, const T *data)
{
UNPACK_V2<doMask>( dest, data, size );
}
template <bool doMask, int size, class T>
static void __fastcall fUNPACK_V3(u32 *dest, const T *data)
{
UNPACK_V3<doMask>( dest, data, size );
}
template <bool doMask, int size, class T>
static void __fastcall fUNPACK_V4(u32 *dest, const T *data)
{
UNPACK_V4<doMask>( dest, data, size );
}
template< bool doMask >
static void __fastcall fUNPACK_V4_5(u32 *dest, const u32 *data)
{
UNPACK_V4_5<doMask>(dest, data, 0); // size is ignored.
}
// --------------------------------------------------------------------------------------
// Main table for function unpacking.
// --------------------------------------------------------------------------------------
@ -245,53 +132,51 @@ static void __fastcall fUNPACK_V4_5(u32 *dest, const u32 *data)
// to be cast as. --air
//
#define _upk (UNPACKFUNCTYPE)
#define _odd (UNPACKFUNCTYPE_ODD)
#define _unpk_s(bits) (UNPACKFUNCTYPE_S##bits)
#define _odd_s(bits) (UNPACKFUNCTYPE_ODD_S##bits)
#define _unpk_u(bits) (UNPACKFUNCTYPE_U##bits)
#define _odd_u(bits) (UNPACKFUNCTYPE_ODD_U##bits)
#define _upk (UNPACKFUNCTYPE)
#define _unpk(usn, bits) (UNPACKFUNCTYPE_##usn##bits)
// 32-bits versions are unsigned-only!!
#define UnpackFuncPair32( sizefac, vt, doMask ) \
(UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt<doMask, sizefac, u32>, \
(UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt<doMask, sizefac, u32>, \
(UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt<doMask, u32>, \
(UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt<doMask, u32>,
#define UnpackFuncSet( vt, idx, mode, usn, doMask ) \
(UNPACKFUNCTYPE)_unpk(u,32) UNPACK_##vt<idx, mode, doMask, u32>, \
(UNPACKFUNCTYPE)_unpk(usn,16) UNPACK_##vt<idx, mode, doMask, usn##16>, \
(UNPACKFUNCTYPE)_unpk(usn,8) UNPACK_##vt<idx, mode, doMask, usn##8> \
#define UnpackFuncPair( sizefac, vt, bits, doMask ) \
(UNPACKFUNCTYPE)_unpk_u(bits) fUNPACK_##vt<doMask, sizefac, u##bits>, \
(UNPACKFUNCTYPE)_unpk_s(bits) fUNPACK_##vt<doMask, sizefac, s##bits>, \
(UNPACKFUNCTYPE_ODD)_odd_u(bits) UNPACK_##vt<doMask, u##bits>, \
(UNPACKFUNCTYPE_ODD)_odd_s(bits) UNPACK_##vt<doMask, s##bits>,
#define UnpackV4_5set(idx, doMask) \
(UNPACKFUNCTYPE)_unpk(u,32) UNPACK_V4_5<idx, doMask> \
#define UnpackFuncSet( doMask ) \
{ UnpackFuncPair32( 4, S, doMask ) 1, 4, 4, 4 }, /* 0x0 - S-32 */ \
{ UnpackFuncPair ( 4, S, 16, doMask ) 2, 2, 2, 4 }, /* 0x1 - S-16 */ \
{ UnpackFuncPair ( 4, S, 8, doMask ) 4, 1, 1, 4 }, /* 0x2 - S-8 */ \
{ NULL, NULL, NULL, NULL, 0, 0, 0, 0 }, /* 0x3 (NULL) */ \
{ UnpackFuncPair32( 2, V2, doMask ) 24, 4, 8, 2 }, /* 0x4 - V2-32 */ \
{ UnpackFuncPair ( 2, V2, 16, doMask ) 12, 2, 4, 2 }, /* 0x5 - V2-16 */ \
{ UnpackFuncPair ( 2, V2, 8, doMask ) 6, 1, 2, 2 }, /* 0x6 - V2-8 */ \
{ NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0x7 (NULL) */ \
{ UnpackFuncPair32( 3, V3, doMask ) 36, 4, 12, 3 }, /* 0x8 - V3-32 */ \
{ UnpackFuncPair ( 3, V3, 16, doMask ) 18, 2, 6, 3 }, /* 0x9 - V3-16 */ \
{ UnpackFuncPair ( 3, V3, 8, doMask ) 9, 1, 3, 3 }, /* 0xA - V3-8 */ \
{ NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0xB (NULL) */ \
{ UnpackFuncPair32( 4, V4, doMask ) 48, 4, 16, 4 }, /* 0xC - V4-32 */ \
{ UnpackFuncPair ( 4, V4, 16, doMask ) 24, 2, 8, 4 }, /* 0xD - V4-16 */ \
{ UnpackFuncPair ( 4, V4, 8, doMask ) 12, 1, 4, 4 }, /* 0xE - V4-8 */ \
{ /* 0xF - V4-5 */ \
(UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5<doMask>, \
(UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5<doMask>, \
(UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5<doMask>, \
(UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5<doMask>, \
6, 2, 2, 4 },
#define UnpackModeSet(idx, mode) \
UnpackFuncSet( S, idx, mode, s, 0 ), NULL, \
UnpackFuncSet( V2, idx, mode, s, 0 ), NULL, \
UnpackFuncSet( V4, idx, mode, s, 0 ), NULL, \
UnpackFuncSet( V4, idx, mode, s, 0 ), UnpackV4_5set(idx, 0), \
\
UnpackFuncSet( S, idx, mode, s, 1 ), NULL, \
UnpackFuncSet( V2, idx, mode, s, 1 ), NULL, \
UnpackFuncSet( V4, idx, mode, s, 1 ), NULL, \
UnpackFuncSet( V4, idx, mode, s, 1 ), UnpackV4_5set(idx, 1), \
\
UnpackFuncSet( S, idx, mode, u, 0 ), NULL, \
UnpackFuncSet( V2, idx, mode, u, 0 ), NULL, \
UnpackFuncSet( V4, idx, mode, u, 0 ), NULL, \
UnpackFuncSet( V4, idx, mode, u, 0 ), UnpackV4_5set(idx, 0), \
\
UnpackFuncSet( S, idx, mode, u, 1 ), NULL, \
UnpackFuncSet( V2, idx, mode, u, 1 ), NULL, \
UnpackFuncSet( V4, idx, mode, u, 1 ), NULL, \
UnpackFuncSet( V4, idx, mode, u, 1 ), UnpackV4_5set(idx, 1)
const __aligned16 VIFUnpackFuncTable VIFfuncTable[32] =
__aligned16 const UNPACKFUNCTYPE VIFfuncTable[2][3][4 * 4 * 2 * 2] =
{
UnpackFuncSet( false )
UnpackFuncSet( true )
{
{ UnpackModeSet(0,0) },
{ UnpackModeSet(0,1) },
{ UnpackModeSet(0,2) }
},
{
{ UnpackModeSet(1,0) },
{ UnpackModeSet(1,1) },
{ UnpackModeSet(1,2) }
}
};
//----------------------------------------------------------------------------
@ -317,16 +202,23 @@ _vifT void vifUnpackSetup(const u32 *data) {
if (vifNum == 0) vifNum = 256;
vifXRegs.num = vifNum;
// Traditional-style way of calculating the gsize, based on VN/VL parameters.
// Useful when VN/VL are known template params, but currently they are not so we use
// the LUT instead (for now).
//uint vl = vifX.cmd & 0x03;
//uint vn = (vifX.cmd >> 2) & 0x3;
//uint gsize = ((32 >> vl) * (vn+1)) / 8;
const u8& gsize = nVifT[vifX.cmd & 0x0f];
if (vifXRegs.cycle.wl <= vifXRegs.cycle.cl) {
if (!idx) vif0.tag.size = ((vifNum * VIFfuncTable[ vif0.cmd & 0xf ].gsize) + 3) >> 2;
else vif1.tag.size = ((vifNum * VIFfuncTable[ vif1.cmd & 0xf ].gsize) + 3) >> 2;
vifX.tag.size = ((vifNum * gsize) + 3) / 4;
}
else {
int n = vifXRegs.cycle.cl * (vifNum / vifXRegs.cycle.wl) +
_limit(vifNum % vifXRegs.cycle.wl, vifXRegs.cycle.cl);
if (!idx) vif0.tag.size = ((n * VIFfuncTable[ vif0.cmd & 0xf ].gsize) + 3) >> 2;
else vif1.tag.size = ((n * VIFfuncTable[ vif1.cmd & 0xf ].gsize) + 3) >> 2;
vifX.tag.size = ((n * gsize) + 3) >> 2;
}
u32 addr = vifXRegs.code;
@ -337,7 +229,6 @@ _vifT void vifUnpackSetup(const u32 *data) {
vifX.cl = 0;
vifX.tag.cmd = vifX.cmd;
vifXRegs.offset = 0;
}
template void vifUnpackSetup<0>(const u32 *data);

View File

@ -15,44 +15,28 @@
#pragma once
typedef void (__fastcall *UNPACKFUNCTYPE)(u32 *dest, const u32 *data);
typedef void (__fastcall *UNPACKFUNCTYPE_ODD)(u32 *dest, const u32 *data, int size);
typedef int (*UNPACKPARTFUNCTYPESSE)(u32 *dest, const u32 *data, int size);
struct vifStruct;
#define create_unpack_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_U##bits)(u32 *dest, const u##bits *data);
#define create_unpack_odd_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_U##bits)(u32 *dest, const u##bits *data, int size);
#define create_unpack_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_S##bits)(u32 *dest, const s##bits *data);
#define create_unpack_odd_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_S##bits)(u32 *dest, const s##bits *data, int size);
typedef void (__fastcall *UNPACKFUNCTYPE)(void* dest, const void* src);
#define create_unpack_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_u##bits)(u32* dest, const u##bits* src);
#define create_unpack_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_s##bits)(u32* dest, const s##bits* src);
#define create_some_unpacks(bits) \
create_unpack_u_type(bits); \
create_unpack_odd_u_type(bits); \
create_unpack_s_type(bits); \
create_unpack_odd_s_type(bits);
create_some_unpacks(32);
create_some_unpacks(16);
create_some_unpacks(8);
struct VIFUnpackFuncTable
{
UNPACKFUNCTYPE funcU;
UNPACKFUNCTYPE funcS;
extern __aligned16 const u8 nVifT[16];
UNPACKFUNCTYPE_ODD oddU; // needed for old-style vif only, remove when old vif is removed.
UNPACKFUNCTYPE_ODD oddS; // needed for old-style vif only, remove when old vif is removed.
// Array sub-dimension order: [vifidx] [mode] (VN * VL * USN * doMask)
extern __aligned16 const UNPACKFUNCTYPE VIFfuncTable[2][3][(4 * 4 * 2 * 2)];
u8 bsize; // currently unused
u8 dsize; // byte size of one channel
u8 gsize; // size of data in bytes used for each write cycle
u8 qsize; // used for unpack parts, num of vectors that
// will be decompressed from data for 1 cycle
};
extern const __aligned16 VIFUnpackFuncTable VIFfuncTable[32];
extern int nVifUnpack (int idx, const u8 *data);
_vifT extern int nVifUnpack (const u8* data);
extern void resetNewVif(int idx);
template< int idx >
extern void vifUnpackSetup(const u32 *data);
extern void vifUnpackSetup(const u32* data);

View File

@ -1,159 +0,0 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2010 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
// Old Vif Unpack Code
// Only here for testing/reference
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
//if (!VIFdmanum) DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
VURegs * VU;
u8 *cdata = (u8*)data;
u32 tempsize = 0;
const u32 memlimit = (VIFdmanum == 0) ? 0x1000 : 0x4000;
if (VIFdmanum == 0) {
VU = &VU0;
vifRegs = &vif0Regs;
vif = &vif0;
}
else {
VU = &VU1;
vifRegs = &vif1Regs;
vif = &vif1;
}
u32 *dest = (u32*)(VU->Mem + v->addr);
const VIFUnpackFuncTable& ft( VIFfuncTable[ v->cmd & 0x1f ] );
UNPACKFUNCTYPE func = vif->usn ? ft.funcU : ft.funcS;
size <<= 2;
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) { // skipping write
if (v->addr >= memlimit) {
DevCon.Warning("Overflown at the start");
v->addr &= (memlimit - 1);
dest = (u32*)(VU->Mem + v->addr);
}
size = std::min<u32>(size, vifRegs->num * ft.gsize); //size will always be the same or smaller
tempsize = v->addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) *
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);
//Sanity Check (memory overflow)
if (tempsize > memlimit) {
if (((vifRegs->cycle.cl != vifRegs->cycle.wl) &&
((memlimit + (vifRegs->cycle.cl - vifRegs->cycle.wl) * 16) == tempsize))) {
//It's a red herring, so ignore it! SSE unpacks will be much quicker.
DevCon.WriteLn("what!!!!!!!!!");
//tempsize = 0;
tempsize = size;
size = 0;
}
else {
DevCon.Warning("VIF%x Unpack ending %x > %x", VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000);
tempsize = size;
size = 0;
}
}
else {
tempsize = size;
size = 0;
}
if (tempsize) {
int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
size = 0;
int addrstart = v->addr;
//if((tempsize >> 2) != v->size) DevCon.Warning("split when size != tagsize");
//DbgCon.WriteLn("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, v->addr);
while ((tempsize >= ft.gsize) && (vifRegs->num > 0)) {
if(v->addr >= memlimit) {
DevCon.Warning("Mem limit overflow");
v->addr &= (memlimit - 1);
dest = (u32*)(VU->Mem + v->addr);
}
func(dest, (u32*)cdata);
cdata += ft.gsize;
tempsize -= ft.gsize;
vifRegs->num--;
vif->cl++;
if (vif->cl == vifRegs->cycle.wl) {
dest += incdest;
v->addr +=(incdest * 4);
vif->cl = 0;
}
else {
dest += 4;
v->addr += 16;
}
}
if (v->addr >= memlimit) {
v->addr &=(memlimit - 1);
dest = (u32*)(VU->Mem + v->addr);
}
v->addr = addrstart;
if(tempsize > 0) size = tempsize;
}
if (size >= ft.dsize && vifRegs->num > 0) { //Else write what we do have
VIF_LOG("warning, end with size = %d", size);
// unpack one qword
//v->addr += (size / ft.dsize) * 4;
(vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, size / ft.dsize);
size = 0;
//DbgCon.WriteLn("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, v->addr);
}
}
else { // filling write
if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P
if((u32)(((size / ft.gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num)
DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft.gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl);
DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, v->cmd & 0xf, vif->tag.addr);
while (vifRegs->num > 0) {
if (vif->cl == vifRegs->cycle.wl) {
vif->cl = 0;
}
// unpack one qword
if (vif->cl < vifRegs->cycle.cl) {
if(size < ft.gsize) { DevCon.WriteLn("Out of Filling write data!"); break; }
func(dest, (u32*)cdata);
cdata += ft.gsize;
size -= ft.gsize;
vif->cl++;
vifRegs->num--;
if (vif->cl == vifRegs->cycle.wl) {
vif->cl = 0;
}
}
else {
func(dest, (u32*)cdata);
v->addr += 16;
vifRegs->num--;
vif->cl++;
}
dest += 4;
if (vifRegs->num == 0) break;
}
}
}

View File

@ -58,10 +58,15 @@ enum GIF_REG
// are modified during the GIFtag unpacking process.
struct GIFTAG
{
u32 NLOOP : 15;
u32 EOP : 1;
u32 _dummy0 : 16;
u16 NLOOP : 15;
u16 EOP : 1;
// Note that contents of the Dummy bits on real hardware is likely used to maintain state
// information regarding tag processing (namely nllop and curreg info, so to resume partial
// transfers later).
u16 _dummy0 : 16;
u32 _dummy1 : 14;
u32 PRE : 1;
u32 PRIM : 11;
u32 FLG : 2;

View File

@ -203,7 +203,7 @@ template< typename T>
static __ri void IopHwTraceLog( u32 addr, T val, bool mode )
{
if (!IsDevBuild) return;
if (!EmuConfig.Trace.IOP.m_EnableRegisters) return;
if (!EmuConfig.Trace.Enabled || !EmuConfig.Trace.IOP.m_EnableAll || !EmuConfig.Trace.IOP.m_EnableRegisters) return;
FastFormatAscii valStr;
FastFormatAscii labelStr;

View File

@ -243,7 +243,7 @@ template< typename T>
static __ri void eeHwTraceLog( u32 addr, T val, bool mode )
{
if (!IsDevBuild) return;
if (!EmuConfig.Trace.EE.m_EnableRegisters) return;
if (!EmuConfig.Trace.Enabled || !EmuConfig.Trace.EE.m_EnableAll || !EmuConfig.Trace.EE.m_EnableRegisters) return;
FastFormatAscii valStr;
FastFormatAscii labelStr;

View File

@ -831,10 +831,6 @@
RelativePath="..\..\Vif_Unpack.h"
>
</File>
<File
RelativePath="..\..\Vif_Unpack.inl"
>
</File>
<Filter
Name="newVif"
>

View File

@ -39,7 +39,7 @@
using namespace x86Emitter;
extern u32 g_psxNextBranchCycle;
extern u32 g_iopNextEventCycle;
extern void psxBREAK();
u32 g_psxMaxRecMem = 0;
@ -121,7 +121,7 @@ static DynGenFunc* iopExitRecompiledCode = NULL;
static void recEventTest()
{
_cpuBranchTest_Shared();
_cpuEventTest_Shared();
}
// parameters:
@ -876,28 +876,28 @@ static void recExecute()
static __noinline s32 recExecuteBlock( s32 eeCycles )
{
psxBreak = 0;
psxCycleEE = eeCycles;
iopBreak = 0;
iopCycleEE = eeCycles;
// [TODO] recExecuteBlock could be replaced by a direct call to the iopEnterRecompiledCode()
// (by assigning its address to the psxRec structure). But for that to happen, we need
// to move psxBreak/psxCycleEE update code to emitted assembly code. >_< --air
// to move iopBreak/iopCycleEE update code to emitted assembly code. >_< --air
// Likely Disasm, as borrowed from MSVC:
// Entry:
// mov eax,dword ptr [esp+4]
// mov dword ptr [psxBreak (0E88DCCh)],0
// mov dword ptr [psxCycleEE (832A84h)],eax
// mov dword ptr [iopBreak (0E88DCCh)],0
// mov dword ptr [iopCycleEE (832A84h)],eax
// Exit:
// mov ecx,dword ptr [psxBreak (0E88DCCh)]
// mov edx,dword ptr [psxCycleEE (832A84h)]
// mov ecx,dword ptr [iopBreak (0E88DCCh)]
// mov edx,dword ptr [iopCycleEE (832A84h)]
// lea eax,[edx+ecx]
iopEnterRecompiledCode();
return psxBreak + psxCycleEE;
return iopBreak + iopCycleEE;
}
// Returns the offset to the next instruction after any cleared memory
@ -1021,19 +1021,19 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch)
{
xMOV(eax, ptr32[&psxRegs.cycle]);
xMOV(ecx, eax);
xMOV(edx, ptr32[&psxCycleEE]);
xMOV(edx, ptr32[&iopCycleEE]);
xADD(edx, 7);
xSHR(edx, 3);
xADD(eax, edx);
xCMP(eax, ptr32[&g_psxNextBranchCycle]);
xCMOVNS(eax, ptr32[&g_psxNextBranchCycle]);
xCMP(eax, ptr32[&g_iopNextEventCycle]);
xCMOVNS(eax, ptr32[&g_iopNextEventCycle]);
xMOV(ptr32[&psxRegs.cycle], eax);
xSUB(eax, ecx);
xSHL(eax, 3);
xSUB(ptr32[&psxCycleEE], eax);
xSUB(ptr32[&iopCycleEE], eax);
xJLE(iopExitRecompiledCode);
xCALL(psxBranchTest);
xCALL(iopEventTest);
if( newpc != 0xffffffff )
{
@ -1047,15 +1047,15 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch)
xADD(eax, blockCycles);
xMOV(ptr32[&psxRegs.cycle], eax); // update cycles
// jump if psxCycleEE <= 0 (iop's timeslice timed out, so time to return control to the EE)
xSUB(ptr32[&psxCycleEE], blockCycles*8);
// jump if iopCycleEE <= 0 (iop's timeslice timed out, so time to return control to the EE)
xSUB(ptr32[&iopCycleEE], blockCycles*8);
xJLE(iopExitRecompiledCode);
// check if an event is pending
xSUB(eax, ptr32[&g_psxNextBranchCycle]);
xSUB(eax, ptr32[&g_iopNextEventCycle]);
xForwardJS<u8> nointerruptpending;
xCALL(psxBranchTest);
xCALL(iopEventTest);
if( newpc != 0xffffffff ) {
xCMP(ptr32[&psxRegs.pc], newpc);
@ -1098,7 +1098,7 @@ void rpsxSYSCALL()
j8Ptr[0] = JE8(0);
ADD32ItoM((uptr)&psxRegs.cycle, psxScaleBlockCycles() );
SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 );
SUB32ItoM((uptr)&iopCycleEE, psxScaleBlockCycles()*8 );
JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 ));
// jump target for skipping blockCycle updates
@ -1120,7 +1120,7 @@ void rpsxBREAK()
CMP32ItoM((uptr)&psxRegs.pc, psxpc-4);
j8Ptr[0] = JE8(0);
ADD32ItoM((uptr)&psxRegs.cycle, psxScaleBlockCycles() );
SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 );
SUB32ItoM((uptr)&iopCycleEE, psxScaleBlockCycles()*8 );
JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 ));
x86SetJ8(j8Ptr[0]);
@ -1373,7 +1373,7 @@ StartRecomp:
else
{
ADD32ItoM((uptr)&psxRegs.cycle, psxScaleBlockCycles() );
SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 );
SUB32ItoM((uptr)&iopCycleEE, psxScaleBlockCycles()*8 );
}
if (willbranch3 || !psxbranch) {

View File

@ -316,7 +316,7 @@ void recBranchCall( void (*func)() )
// to the current cpu cycle.
MOV32MtoR( EAX, (uptr)&cpuRegs.cycle );
MOV32RtoM( (uptr)&g_nextBranchCycle, EAX );
MOV32RtoM( (uptr)&g_nextEventCycle, EAX );
recCall(func);
branch = 2;
@ -350,7 +350,7 @@ static DynGenFunc* ExitRecompiledCode = NULL;
static void recEventTest()
{
_cpuBranchTest_Shared();
_cpuEventTest_Shared();
}
// parameters:
@ -1111,11 +1111,11 @@ static void iBranchTest(u32 newpc)
// Check the Event scheduler if our "cycle target" has been reached.
// Equiv code to:
// cpuRegs.cycle += blockcycles;
// if( cpuRegs.cycle > g_nextBranchCycle ) { DoEvents(); }
// if( cpuRegs.cycle > g_nextEventCycle ) { DoEvents(); }
if (EmuConfig.Speedhacks.WaitLoop && s_nBlockFF && newpc == s_branchTo)
{
xMOV(eax, ptr32[&g_nextBranchCycle]);
xMOV(eax, ptr32[&g_nextEventCycle]);
xADD(ptr32[&cpuRegs.cycle], eeScaleBlockCycles());
xCMP(eax, ptr32[&cpuRegs.cycle]);
xCMOVS(eax, ptr32[&cpuRegs.cycle]);
@ -1128,7 +1128,7 @@ static void iBranchTest(u32 newpc)
xMOV(eax, ptr[&cpuRegs.cycle]);
xADD(eax, eeScaleBlockCycles());
xMOV(ptr[&cpuRegs.cycle], eax); // update cycles
xSUB(eax, ptr[&g_nextBranchCycle]);
xSUB(eax, ptr[&g_nextEventCycle]);
if (newpc == 0xffffffff)
xJS( DispatcherReg );

View File

@ -32,12 +32,13 @@ typedef void (__fastcall *nVifrecCall)(uptr dest, uptr src);
#include "newVif_HashBucket.h"
extern void mVUmergeRegs(const xRegisterSSE& dest, const xRegisterSSE& src, int xyzw, bool modXYZW = 0);
extern void _nVifUnpack (int idx, const u8 *data, u32 size, bool isFill);
extern void dVifUnpack (int idx, const u8 *data, u32 size, bool isFill);
extern void _nVifUnpack (int idx, const u8* data, uint mode, bool isFill);
extern void dVifReset (int idx);
extern void dVifClose (int idx);
extern void VifUnpackSSE_Init();
_vifT extern void dVifUnpack (const u8* data, bool isFill);
#define VUFT VIFUnpackFuncTable
#define _v0 0
#define _v1 0x55
@ -50,11 +51,6 @@ extern void VifUnpackSSE_Init();
#define xmmRow xmm6
#define xmmTemp xmm7
#ifdef _MSC_VER
# pragma pack(1)
# pragma warning(disable:4996) // 'function': was declared deprecated
#endif
// nVifBlock - Ordered for Hashing; the 'num' field and the lower 6 bits of upkType are
// used as the hash bucket selector.
//
@ -62,30 +58,25 @@ struct __aligned16 nVifBlock {
u8 num; // [00] Num Field
u8 upkType; // [01] Unpack Type [usn*1:mask*1:upk*4]
u8 mode; // [02] Mode Field
u8 scl; // [03] Start Cycle
u8 cl; // [04] CL Field
u8 wl; // [05] WL Field
u32 mask; // [06] Mask Field
u8 padding[2];// [10] through [11]
u8 cl; // [03] CL Field
u32 mask; // [04] Mask Field
u8 wl; // [08] WL Field
u8 padding[3];// [09] through [11]
uptr startPtr; // [12] Start Ptr of RecGen Code
} __packed; // 16 bytes
#ifdef _MSC_VER
# pragma pack()
#endif
}; // 16 bytes
#define _hSize 0x4000 // [usn*1:mask*1:upk*4:num*8] hash...
#define _cmpS (sizeof(nVifBlock) - (4))
#define _tParams nVifBlock, _hSize, _cmpS
struct nVifStruct {
u32 idx; // VIF0 or VIF1
vifStruct* vif; // Vif Struct ptr
VIFregisters* vifRegs; // Vif Regs ptr
VURegs* VU; // VU Regs ptr
u32 vuMemLimit; // Use for fast AND
// Buffer for partial transfers (should always be first to ensure alignment)
// Maximum buffer size is 256 (vifRegs.Num max range) * 16 (quadword)
__aligned16 u8 buffer[256*16];
u32 bSize; // Size of 'buffer'
u32 bPtr;
u8 buffer[_1mb]; // Buffer for partial transfers
u32 idx; // VIF0 or VIF1
u8* recPtr; // Cur Pos to recompile to
u8* recEnd; // 'Safe' End of Rec Cache
BlockBuffer* vifCache; // Block Buffer
@ -103,9 +94,7 @@ struct nVifStruct {
};
extern __aligned16 nVifStruct nVif[2];
extern __aligned16 const u8 nVifT[16];
extern __aligned16 nVifCall nVifUpk[(2*2*16)*4]; // ([USN][Masking][Unpack Type]) [curCycle]
extern __aligned16 u32 nVifMask[3][4][4]; // [MaskNumber][CycleNumber][Vector]
static const bool useOldUnpack = 0; // Use code in newVif_OldUnpack.inl
static const bool newVifDynaRec = 1; // Use code in newVif_Dynarec.inl

View File

@ -58,6 +58,7 @@ VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlo
usn = (vB.upkType>>5) & 1;
doMask = (vB.upkType>>4) & 1;
doMode = vB.mode & 3;
vCL = 0;
}
#define makeMergeMask(x) { \
@ -65,15 +66,15 @@ VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlo
}
__fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const {
const vifStruct& vif = v.idx ? vif1 : vif0;
u32 m0 = vB.mask;
u32 m1 = m0 & 0xaaaaaaaa;
u32 m2 =(~m1>>1) & m0;
u32 m3 = (m1>>1) & ~m0;
u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
u32* col = (v.idx) ? g_vifmask.Col1 : g_vifmask.Col0;
if((m2&&(doMask||isFill))||doMode) { xMOVAPS(xmmRow, ptr32[row]); }
if((m2&&(doMask||isFill))||doMode) { xMOVAPS(xmmRow, ptr128[&vif.MaskRow]); }
if (m3&&(doMask||isFill)) {
xMOVAPS(xmmCol0, ptr32[col]);
xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]);
if ((cS>=2) && (m3&0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
if ((cS>=3) && (m3&0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
if ((cS>=4) && (m3&0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
@ -95,8 +96,8 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const {
makeMergeMask(m3);
makeMergeMask(m4);
if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); } // Load Write Protect
if (doMask&&m2) { mergeVectors(regX, xmmRow, t, m2); } // Merge Row
if (doMask&&m3) { mergeVectors(regX, xRegisterSSE(xmmCol0.Id+cc), t, m3); } // Merge Col
if (doMask&&m2) { mergeVectors(regX, xmmRow, t, m2); } // Merge MaskRow
if (doMask&&m3) { mergeVectors(regX, xRegisterSSE(xmmCol0.Id+cc), t, m3); } // Merge MaskCol
if (doMask&&m4) { mergeVectors(regX, xmmTemp, t, m4); } // Merge Write Protect
if (doMode) {
u32 m5 = (~m1>>1) & ~m0;
@ -117,8 +118,7 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const {
}
void VifUnpackSSE_Dynarec::writeBackRow() const {
u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
xMOVAPS(ptr32[row], xmmRow);
xMOVAPS(ptr128[&((v.idx ? vif1 : vif0).MaskRow)], xmmRow);
DevCon.WriteLn("nVif: writing back row reg! [doMode = 2]");
// ToDo: Do we need to write back to vifregs.rX too!? :/
}
@ -139,15 +139,16 @@ static void ShiftDisplacementWindow( xAddressVoid& addr, const xRegister32& modR
}
void VifUnpackSSE_Dynarec::CompileRoutine() {
const int upkNum = v.vif->cmd & 0xf;
const int upkNum = vB.upkType & 0xf;
const u8& vift = nVifT[upkNum];
const int cycleSize = isFill ? vB.cl : vB.wl;
const int blockSize = isFill ? vB.wl : vB.cl;
const int skipSize = blockSize - cycleSize;
uint vNum = vB.num ? vB.num : 256;
doMode = (upkNum == 0xf) ? 0 : doMode; // V4_5 has no mode feature.
int vNum = v.vifRegs->num;
vCL = v.vif->cl;
doMode = upkNum == 0xf ? 0 : doMode;
pxAssume(vCL == 0);
// Value passed determines # of col regs we need to load
SetMasks(isFill ? blockSize : cycleSize);
@ -189,14 +190,17 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
}
if (doMode==2) writeBackRow();
xMOV(ptr32[&v.vif->cl], vCL);
xMOV(ptr32[&v.vifRegs->num], vNum);
xRET();
}
static __noinline u8* dVifsetVUptr(const nVifStruct& v, int cl, int wl, bool isFill) {
u8* startmem = v.VU->Mem + (v.vif->tag.addr & v.vuMemLimit);
u8* endmem = v.VU->Mem + (v.vuMemLimit+0x10);
_vifT static __fi u8* dVifsetVUptr(uint cl, uint wl, bool isFill) {
vifStruct& vif = GetVifX;
VIFregisters& vifRegs = vifXRegs;
const VURegs& VU = vuRegs[idx];
const uint vuMemLimit = idx ? 0x4000 : 0x1000;
u8* startmem = VU.Mem + (vif.tag.addr & (vuMemLimit-0x10));
u8* endmem = VU.Mem + vuMemLimit;
uint length = _vBlock.num * 16;
if (!isFill) {
@ -204,15 +208,15 @@ static __noinline u8* dVifsetVUptr(const nVifStruct& v, int cl, int wl, bool isF
// shouldn't count as wrapped data. Otherwise, a trailing skip can cause the emu to drop back
// to the interpreter. -- Refraction (test with MGS3)
int skipSize = (cl - wl) * 16;
int blocks = _vBlock.num / wl;
uint skipSize = (cl - wl) * 16;
uint blocks = _vBlock.num / wl;
length += (blocks-1) * skipSize;
}
if ( (startmem+length) <= endmem ) {
return startmem;
}
//Console.WriteLn("nVif%x - VU Mem Ptr Overflow; falling back to interpreter. Start = %x End = %x num = %x, wl = %x, cl = %x", v.idx, v.vif->tag.addr, v.vif->tag.addr + (_vBlock.num * 16), _vBlock.num, wl, cl);
//Console.WriteLn("nVif%x - VU Mem Ptr Overflow; falling back to interpreter. Start = %x End = %x num = %x, wl = %x, cl = %x", v.idx, vif.tag.addr, vif.tag.addr + (_vBlock.num * 16), _vBlock.num, wl, cl);
return NULL; // Fall Back to Interpreters which have wrap-around logic
}
@ -227,46 +231,52 @@ static __fi void dVifRecLimit(int idx) {
}
}
// Gcc complains about recursive functions being inlined.
void dVifUnpack(int idx, const u8 *data, u32 size, bool isFill) {
_vifT static __fi bool dVifExecuteUnpack(const u8* data, bool isFill)
{
const nVifStruct& v = nVif[idx];
const u8 upkType = v.vif->cmd & 0x1f | ((!!v.vif->usn) << 5);
const int doMask = v.vif->cmd & 0x10;
const int cycle_cl = v.vifRegs->cycle.cl;
const int cycle_wl = v.vifRegs->cycle.wl;
const int blockSize = isFill ? cycle_wl : cycle_cl;
if (v.vif->cl >= blockSize) v.vif->cl = 0;
_vBlock.upkType = upkType;
_vBlock.num = (u8&)v.vifRegs->num;
_vBlock.mode = (u8&)v.vifRegs->mode;
_vBlock.scl = v.vif->cl;
_vBlock.cl = cycle_cl;
_vBlock.wl = cycle_wl;
// Zero out the mask parameter if it's unused -- games leave random junk
// values here which cause false recblock cache misses.
_vBlock.mask = doMask ? v.vifRegs->mask : 0;
VIFregisters& vifRegs = vifXRegs;
if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
if (u8* dest = dVifsetVUptr(v, cycle_cl, cycle_wl, isFill)) {
if (u8* dest = dVifsetVUptr<idx>(vifRegs.cycle.cl, vifRegs.cycle.wl, isFill)) {
//DevCon.WriteLn("Running Recompiled Block!");
((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
}
else {
//DevCon.WriteLn("Running Interpreter Block");
_nVifUnpack(idx, data, size, isFill);
_nVifUnpack(idx, data, vifRegs.mode, isFill);
}
return;
return true;
}
return false;
}
_vifT __fi void dVifUnpack(const u8* data, bool isFill) {
const nVifStruct& v = nVif[idx];
vifStruct& vif = GetVifX;
VIFregisters& vifRegs = vifXRegs;
const u8 upkType = (vif.cmd & 0x1f) | (vif.usn << 5);
const int doMask = (vif.cmd & 0x10);
_vBlock.upkType = upkType;
_vBlock.num = (u8&)vifRegs.num;
_vBlock.mode = (u8&)vifRegs.mode;
_vBlock.cl = vifRegs.cycle.cl;
_vBlock.wl = vifRegs.cycle.wl;
// Zero out the mask parameter if it's unused -- games leave random junk
// values here which cause false recblock cache misses.
_vBlock.mask = doMask ? vifRegs.mask : 0;
//DevCon.WriteLn("nVif%d: Recompiled Block! [%d]", idx, nVif[idx].numBlocks++);
//DevCon.WriteLn(L"[num=% 3d][upkType=0x%02x][scl=%d][cl=%d][wl=%d][mode=%d][m=%d][mask=%s]",
// _vBlock.num, _vBlock.upkType, _vBlock.scl, _vBlock.cl, _vBlock.wl, _vBlock.mode,
// doMask >> 4, doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
//);
if (dVifExecuteUnpack<idx>(data, isFill)) return;
xSetPtr(v.recPtr);
_vBlock.startPtr = (uptr)xGetAlignedCallTarget();
v.vifBlocks->add(_vBlock);
@ -279,5 +289,8 @@ void dVifUnpack(int idx, const u8 *data, u32 size, bool isFill) {
// Run the block we just compiled. Various conditions may force us to still use
// the interpreter unpacker though, so a recursive call is the safest way here...
dVifUnpack(idx, data, size, isFill);
dVifExecuteUnpack<idx>(data, isFill);
}
template void dVifUnpack<0>(const u8* data, bool isFill);
template void dVifUnpack<1>(const u8* data, bool isFill);

View File

@ -21,12 +21,20 @@
#include "Common.h"
#include "Vif_Dma.h"
#include "newVif.h"
#include "Vif_Unpack.inl"
__aligned16 nVifStruct nVif[2];
__aligned16 nVifCall nVifUpk[(2*2*16) *4]; // ([USN][Masking][Unpack Type]) [curCycle]
__aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Vector]
// Interpreter-style SSE unpacks. Array layout matches the interpreter C unpacks.
// ([USN][Masking][Unpack Type]) [curCycle]
__aligned16 nVifCall nVifUpk[(2*2*16) *4];
// This is used by the interpreted SSE unpacks only. Recompiled SSE unpacks
// and the interpreted C unpacks use the vif.MaskRow/MaskCol members directly.
// [MaskNumber][CycleNumber][Vector]
__aligned16 u32 nVifMask[3][4][4] = {0};
// Number of bytes of data in the source stream needed for each vector.
// [equivalent to ((32 >> VL) * (VN+1)) / 8]
__aligned16 const u8 nVifT[16] = {
4, // S-32
2, // S-16
@ -47,26 +55,18 @@ __aligned16 const u8 nVifT[16] = {
};
// ----------------------------------------------------------------------------
template< int idx, bool doMode, bool isFill, bool singleUnpack >
__ri void __fastcall _nVifUnpackLoop(const u8 *data, u32 size);
template< int idx, bool doMode, bool isFill >
__ri void __fastcall _nVifUnpackLoop(const u8* data);
typedef void __fastcall FnType_VifUnpackLoop(const u8 *data, u32 size);
typedef void __fastcall FnType_VifUnpackLoop(const u8* data);
typedef FnType_VifUnpackLoop* Fnptr_VifUnpackLoop;
// Unpacks Until 'Num' is 0
static const __aligned16 Fnptr_VifUnpackLoop UnpackLoopTable[2][2][2] = {
{{ _nVifUnpackLoop<0,0,0,0>, _nVifUnpackLoop<0,0,1,0> },
{ _nVifUnpackLoop<0,1,0,0>, _nVifUnpackLoop<0,1,1,0> },},
{{ _nVifUnpackLoop<1,0,0,0>, _nVifUnpackLoop<1,0,1,0> },
{ _nVifUnpackLoop<1,1,0,0>, _nVifUnpackLoop<1,1,1,0> },},
};
// Unpacks until 1 normal write cycle unpack has been written to VU mem
static const __aligned16 Fnptr_VifUnpackLoop UnpackSingleTable[2][2][2] = {
{{ _nVifUnpackLoop<0,0,0,1>, _nVifUnpackLoop<0,0,1,1> },
{ _nVifUnpackLoop<0,1,0,1>, _nVifUnpackLoop<0,1,1,1> },},
{{ _nVifUnpackLoop<1,0,0,1>, _nVifUnpackLoop<1,0,1,1> },
{ _nVifUnpackLoop<1,1,0,1>, _nVifUnpackLoop<1,1,1,1> },},
{{ _nVifUnpackLoop<0,0,0>, _nVifUnpackLoop<0,0,1> },
{ _nVifUnpackLoop<0,1,0>, _nVifUnpackLoop<0,1,1> },},
{{ _nVifUnpackLoop<1,0,0>, _nVifUnpackLoop<1,0,1> },
{ _nVifUnpackLoop<1,1,0>, _nVifUnpackLoop<1,1,1> },},
};
// ----------------------------------------------------------------------------
@ -76,10 +76,6 @@ void resetNewVif(int idx)
// changed for some reason.
nVif[idx].idx = idx;
nVif[idx].VU = idx ? &VU1 : &VU0;
nVif[idx].vuMemLimit = idx ? 0x3ff0 : 0xff0;
nVif[idx].vif = &GetVifX;
nVif[idx].vifRegs = &vifXRegs;
nVif[idx].bSize = 0;
memzero(nVif[idx].buffer);
@ -90,65 +86,75 @@ void closeNewVif(int idx) {
if (newVifDynaRec) dVifClose(idx);
}
static __fi u8* setVUptr(int vuidx, const u8* vuMemBase, int offset) {
return (u8*)(vuMemBase + ( offset & (vuidx ? 0x3ff0 : 0xff0) ));
static __fi u8* getVUptr(uint idx, int offset) {
return (u8*)(vuRegs[idx].Mem + ( offset & (idx ? 0x3ff0 : 0xff0) ));
}
static __fi void incVUptr(int vuidx, u8* &ptr, const u8* vuMemBase, int amount) {
pxAssume( ((uptr)ptr & 0xf) == 0 ); // alignment check
ptr += amount;
vif->tag.addr += amount;
int diff = ptr - (vuMemBase + (vuidx ? 0x4000 : 0x1000));
if (diff >= 0) {
ptr = (u8*)(vuMemBase + diff);
}
}
static __fi void incVUptrBy16(int vuidx, u8* &ptr, const u8* vuMemBase) {
pxAssume( ((uptr)ptr & 0xf) == 0 ); // alignment check
ptr += 16;
vif->tag.addr += 16;
if( ptr == (vuMemBase + (vuidx ? 0x4000 : 0x1000)) ) {
ptr -= (vuidx ? 0x4000 : 0x1000);
}
}
int nVifUnpack(int idx, const u8* data) {
_vifT int nVifUnpack(const u8* data) {
nVifStruct& v = nVif[idx];
vif = v.vif;
vifRegs = v.vifRegs;
vifStruct& vif = GetVifX;
VIFregisters& vifRegs = vifXRegs;
const int ret = aMin(vif->vifpacketsize, vif->tag.size);
const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
const uint ret = aMin(vif.vifpacketsize, vif.tag.size);
const bool isFill = (vifRegs.cycle.cl < vifRegs.cycle.wl);
s32 size = ret << 2;
if (ret == v.vif->tag.size) { // Full Transfer
if (ret == vif.tag.size) { // Full Transfer
if (v.bSize) { // Last transfer was partial
memcpy_fast(&v.buffer[v.bSize], data, size);
v.bSize += size;
data = v.buffer;
size = v.bSize;
v.bSize += size;
data = v.buffer;
vif.cl = 0;
vifRegs.num = (vifXRegs.code >> 16) & 0xff; // grab NUM form the original VIFcode input.
if (!vifRegs.num) vifRegs.num = 256;
}
if (size > 0 || isFill) {
if (newVifDynaRec) dVifUnpack(idx, data, size, isFill);
else _nVifUnpack(idx, data, size, isFill);
}
vif->tag.size = 0;
vif->cmd = 0;
v.bSize = 0;
if (newVifDynaRec) dVifUnpack<idx>(data, isFill);
else _nVifUnpack(idx, data, vifRegs.mode, isFill);
vif.tag.size = 0;
vif.cmd = 0;
vifRegs.num = 0;
v.bSize = 0;
}
else { // Partial Transfer
memcpy_fast(&v.buffer[v.bSize], data, size);
v.bSize += size;
vif->tag.size -= ret;
v.bSize += size;
vif.tag.size -= ret;
const u8& vSize = nVifT[vif.cmd & 0x0f];
// We need to provide accurate accounting of the NUM register, in case games decided
// to read back from it mid-transfer. Since so few games actually use partial transfers
// of VIF unpacks, this code should not be any bottleneck.
while (size >= vSize) {
--vifRegs.num;
++vif.cl;
if (isFill) {
if (vif.cl < vifRegs.cycle.cl) size -= vSize;
else if (vif.cl == vifRegs.cycle.wl) vif.cl = 0;
}
else
{
size -= vSize;
if (vif.cl >= vifRegs.cycle.wl) vif.cl = 0;
}
}
}
return ret;
}
static void setMasks(int idx, const VIFregisters& v) {
u32* row = idx ? g_vifmask.Row1 : g_vifmask.Row0;
u32* col = idx ? g_vifmask.Col1 : g_vifmask.Col0;
template int nVifUnpack<0>(const u8* data);
template int nVifUnpack<1>(const u8* data);
// This is used by the interpreted SSE unpacks only. Recompiled SSE unpacks
// and the interpreted C unpacks use the vif.MaskRow/MaskCol members directly.
static void setMasks(const vifStruct& vif, const VIFregisters& v) {
for (int i = 0; i < 16; i++) {
int m = (v.mask >> (i*2)) & 3;
switch (m) {
@ -157,15 +163,15 @@ static void setMasks(int idx, const VIFregisters& v) {
nVifMask[1][i/4][i%4] = 0;
nVifMask[2][i/4][i%4] = 0;
break;
case 1: // Row
case 1: // MaskRow
nVifMask[0][i/4][i%4] = 0;
nVifMask[1][i/4][i%4] = 0;
nVifMask[2][i/4][i%4] = newVifDynaRec ? row[i%4] : ((u32*)&v.r0)[(i%4)*4];
nVifMask[2][i/4][i%4] = vif.MaskRow._u32[i%4];
break;
case 2: // Col
case 2: // MaskCol
nVifMask[0][i/4][i%4] = 0;
nVifMask[1][i/4][i%4] = 0;
nVifMask[2][i/4][i%4] = newVifDynaRec ? col[i/4] : ((u32*)&v.c0)[(i/4)*4];
nVifMask[2][i/4][i%4] = vif.MaskCol._u32[i/4];
break;
case 3: // Write Protect
nVifMask[0][i/4][i%4] = 0;
@ -184,80 +190,81 @@ static void setMasks(int idx, const VIFregisters& v) {
// a "win" to move code outside the loop, like normally in most other loop scenarios.
//
// The biggest bottleneck of the current code is the call/ret needed to invoke the SSE
// unpackers. A better option is to generate the entire vifRegs->num loop code as part
// unpackers. A better option is to generate the entire vifRegs.num loop code as part
// of the SSE template, and inline the SSE code into the heart of it. This both avoids
// the call/ret and opens the door for resolving some register dependency chains in the
// current emitted functions. (this is what zero's SSE does to get it's final bit of
// speed advantage over the new vif). --air
//
// As a secondary optimization to above, special handlers could be generated for the
// cycleSize==1 case, which is used frequently enough, and results in enough code
// elimination that it would probably be a win in most cases (and for sure in many
// "slow" games that need it most). --air
// The BEST optimizatin strategy here is to use data available to us from the UNPACK dispatch
// -- namely the unpack type and mask flag -- in combination mode and usn values -- to
// generate ~600 special versions of this function. But since it's an interpreter, who gives
// a crap? Really? :p
//
template< int idx, bool doMode, bool isFill, bool singleUnpack >
__ri void __fastcall _nVifUnpackLoop(const u8 *data, u32 size) {
// size - size of the packet fragment incoming from DMAC.
template< int idx, bool doMode, bool isFill >
__ri void __fastcall _nVifUnpackLoop(const u8* data) {
const int cycleSize = isFill ? vifRegs->cycle.cl : vifRegs->cycle.wl;
const int blockSize = isFill ? vifRegs->cycle.wl : vifRegs->cycle.cl;
const int skipSize = blockSize - cycleSize;
//DevCon.WriteLn("[%d][%d][%d][num=%d][upk=%d][cl=%d][bl=%d][skip=%d]", isFill, doMask, doMode, vifRegs->num, upkNum, vif->cl, blockSize, skipSize);
vifStruct& vif = GetVifX;
VIFregisters& vifRegs = vifXRegs;
if (vif->cmd & 0x10) setMasks(idx, *vifRegs);
// skipSize used for skipping writes only
const int skipSize = (vifRegs.cycle.cl - vifRegs.cycle.wl) * 16;
const int usn = !!(vif->usn);
const int upkNum = vif->cmd & 0x1f;
//const s8& vift = nVifT[upkNum]; // might be useful later when other SSE paths are finished.
//DevCon.WriteLn("[%d][%d][%d][num=%d][upk=%d][cl=%d][bl=%d][skip=%d]", isFill, doMask, doMode, vifRegs.num, upkNum, vif.cl, blockSize, skipSize);
if (!doMode && (vif.cmd & 0x10)) setMasks(vif, vifRegs);
const int usn = !!vif.usn;
const int upkNum = vif.cmd & 0x1f;
const u8& vSize = nVifT[upkNum & 0x0f];
//uint vl = vif.cmd & 0x03;
//uint vn = (vif.cmd >> 2) & 0x3;
//uint vSize = ((32 >> vl) * (vn+1)) / 8; // size of data (in bytes) used for each write cycle
const nVifCall* fnbase = &nVifUpk[ ((usn*2*16) + upkNum) * (4*1) ];
const VIFUnpackFuncTable& ft = VIFfuncTable[upkNum];
UNPACKFUNCTYPE func = usn ? ft.funcU : ft.funcS;
const UNPACKFUNCTYPE ft = VIFfuncTable[idx][doMode ? vifRegs.mode : 0][ ((usn*2*16) + upkNum) ];
const u8* vuMemBase = (idx ? VU1 : VU0).Mem;
u8* dest = setVUptr(idx, vuMemBase, vif->tag.addr);
if (vif->cl >= blockSize) vif->cl = 0;
pxAssume (vif.cl == 0);
pxAssume (vifRegs.cycle.wl > 0);
while (vifRegs->num) {
if (vif->cl < cycleSize) {
if (size < ft.gsize) break;
if (doMode) {
//DevCon.WriteLn("Non SSE; unpackNum = %d", upkNum);
func((u32*)dest, (u32*)data);
}
else {
//DevCon.WriteLn("SSE Unpack!");
fnbase[aMin(vif->cl, 3)](dest, data);
}
data += ft.gsize;
size -= ft.gsize;
vifRegs->num--;
incVUptrBy16(idx, dest, vuMemBase);
if (++vif->cl == blockSize) vif->cl = 0;
if (singleUnpack) return;
}
else if (isFill) {
//DevCon.WriteLn("isFill!");
func((u32*)dest, (u32*)data);
vifRegs->num--;
incVUptrBy16(idx, dest, vuMemBase);
if (++vif->cl == blockSize) vif->cl = 0;
do {
u8* dest = getVUptr(idx, vif.tag.addr);
if (doMode) {
//if (1) {
ft(dest, data);
}
else {
incVUptr(idx, dest, vuMemBase, 16 * skipSize);
vif->cl = 0;
//DevCon.WriteLn("SSE Unpack!");
uint cl3 = aMin(vif.cl,3);
fnbase[cl3](dest, data);
}
}
vif.tag.addr += 16;
--vifRegs.num;
++vif.cl;
if (isFill) {
//DevCon.WriteLn("isFill!");
if (vif.cl < vifRegs.cycle.cl) data += vSize;
else if (vif.cl == vifRegs.cycle.wl) vif.cl = 0;
}
else
{
data += vSize;
if (vif.cl >= vifRegs.cycle.wl) {
vif.tag.addr += skipSize;
vif.cl = 0;
}
}
} while (vifRegs.num);
}
__fi void _nVifUnpack(int idx, const u8 *data, u32 size, bool isFill) {
__fi void _nVifUnpack(int idx, const u8* data, uint mode, bool isFill) {
if (useOldUnpack) {
if (!idx) VIFunpack<0>((u32*)data, &vif0.tag, size>>2);
else VIFunpack<1>((u32*)data, &vif1.tag, size>>2);
return;
}
const bool doMode = !!vifRegs->mode;
UnpackLoopTable[idx][doMode][isFill]( data, size );
UnpackLoopTable[idx][!!mode][isFill]( data );
}

View File

@ -36,31 +36,6 @@ void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, int xy
}
}
// Loads Row/Col Data from vifRegs instead of g_vifmask
// Useful for testing vifReg and g_vifmask inconsistency.
void loadRowCol(nVifStruct& v) {
xMOVAPS(xmm0, ptr32[&v.vifRegs->r0]);
xMOVAPS(xmm1, ptr32[&v.vifRegs->r1]);
xMOVAPS(xmm2, ptr32[&v.vifRegs->r2]);
xMOVAPS(xmm6, ptr32[&v.vifRegs->r3]);
xPSHUF.D(xmm0, xmm0, _v0);
xPSHUF.D(xmm1, xmm1, _v0);
xPSHUF.D(xmm2, xmm2, _v0);
xPSHUF.D(xmm6, xmm6, _v0);
mVUmergeRegs(xmm6, xmm0, 8);
mVUmergeRegs(xmm6, xmm1, 4);
mVUmergeRegs(xmm6, xmm2, 2);
xMOVAPS(xmm2, ptr32[&v.vifRegs->c0]);
xMOVAPS(xmm3, ptr32[&v.vifRegs->c1]);
xMOVAPS(xmm4, ptr32[&v.vifRegs->c2]);
xMOVAPS(xmm5, ptr32[&v.vifRegs->c3]);
xPSHUF.D(xmm2, xmm2, _v0);
xPSHUF.D(xmm3, xmm3, _v0);
xPSHUF.D(xmm4, xmm4, _v0);
xPSHUF.D(xmm5, xmm5, _v0);
}
// =====================================================================================================
// VifUnpackSSE_Base Section
// =====================================================================================================

View File

@ -25,7 +25,6 @@
using namespace x86Emitter;
extern void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, int xyzw);
extern void loadRowCol(nVifStruct& v);
// --------------------------------------------------------------------------------------
// VifUnpackSSE_Base

View File

@ -14,6 +14,7 @@ set(CommonFlags
-DZEROGS_SSE2
-fno-regmove
-fno-strict-aliasing
-Wstrict-aliasing # Allow to track strict aliasing issue.
-Wno-format
-Wno-unused-parameter
-Wno-unused-value
@ -94,6 +95,7 @@ set(zzoglHeaders
zerogsmath.h
zpipe.h
ZZoglCRTC.h
ZZoglShaders.h
ZZGl.h
ZZLog.h)
@ -155,6 +157,9 @@ target_link_libraries(${Output} ${OPENGL_LIBRARIES})
# link target with X11
target_link_libraries(${Output} ${X11_LIBRARIES})
# link target with jpeg
target_link_libraries(${Output} ${JPEG_LIBRARIES})
# User flags options
if(NOT USER_CMAKE_LD_FLAGS STREQUAL "")
target_link_libraries(${Output} "${USER_CMAKE_LD_FLAGS}")

View File

@ -301,6 +301,7 @@ void GLWindow::SwapGLBuffers()
{
static u32 lastswaptime = 0;
SwapBuffers(hDC);
//glClear(GL_COLOR_BUFFER_BIT);
lastswaptime = timeGetTime();
}

View File

@ -227,7 +227,8 @@ void GLWindow::ToggleFullscreen()
XUnlockDisplay(glDisplay);
// Apply the change
XSync(glDisplay, False);
// Note: Xsync is not enough. All pending event must be flush.
XFlush(glDisplay);
// update info structure
GetWindowSize();
@ -298,6 +299,7 @@ bool GLWindow::DisplayWindow(int _width, int _height)
void GLWindow::SwapGLBuffers()
{
glXSwapBuffers(glDisplay, glWindow);
//glClear(GL_COLOR_BUFFER_BIT);
}
void GLWindow::SetTitle(char *strtitle)

View File

@ -117,6 +117,7 @@ enum PSM_value
// Check target bit mode. PSMCT32 and 32Z return 0, 24 and 24Z - 1
// 16, 16S, 16Z, 16SZ -- 2, PSMT8 and 8H - 3, PSMT4, 4HL, 4HH -- 4.
// This code returns the same value on Z-textures, so texel storage mode is (BITMODE and !ISZTEX).
inline int PSMT_BITMODE(int psm) {return (psm & 0x7);}
inline int PSMT_BITS_NUM(int psm)
@ -168,6 +169,11 @@ inline bool PSMT_IS16Z(int psm) {return ((psm & 0x32) == 0x32);}
// I'll have to look closer at it, because it'd seem like it'd return true for 24 bits.
inline bool PSMT_IS32BIT(int psm) {return !!(psm <= 1);}
// When color format is RGB24 (PSMCT24) or RGBA16 (PSMCT16 & 16S) alpha value expanded, based on
// TEXA register and AEM status.
inline int PSMT_ALPHAEXP(int psm) {return (psm == PSMCT24 || psm == PSMCT16 || psm == PSMCT16S);}
// This function updates the 6th and 5th bit of psm
// 00 or 11 -> 00 ; 01 -> 10 ; 10 -> 01
inline int Switch_Top_Bytes (int X) {
@ -177,6 +183,19 @@ inline int Switch_Top_Bytes (int X) {
return (X ^ 0x30);
}
// How many pixel stored in 1 word.
// PSMT8 has 4 pixels per 32bit, PSMT4 has 8. All 16-bit textures are 2 pixel per bit. And all others are 1 pixel in texture.
inline int PIXELS_PER_WORD(int psm)
{
if (psm == PSMT8)
return 4;
if (psm == PSMT4)
return 8;
if (PSMT_IS16BIT(psm))
return 2;
return 1;
}
// Some storage formats could share the same memory block (2 textures in 1 format). This include following combinations:
// PSMT24(24Z) with either 8H, 4HL, 4HH and PSMT4HL with PSMT4HH.
// We use slightly different versions of this function on comparison with GSDX, Storage format XOR 0x30 made Z-textures
@ -488,6 +507,10 @@ typedef struct
GIFRegDIMX dimx;
GSMemory mem;
GSClut clut_buffer;
int primNext(int inc = 1)
{
return ((primIndex + inc) % ARRAY_SIZE(gsvertex));
}
void setRGBA(u32 r, u32 g, u32 b, u32 a)
{
@ -504,7 +527,7 @@ typedef struct
vertexregs.z = z;
vertexregs.f = f;
gsvertex[primIndex] = vertexregs;
primIndex = (primIndex + 1) % ARRAY_SIZE(gsvertex);
primIndex = primNext();
}
void add_vertex(u16 x, u16 y, u32 z)
@ -513,7 +536,7 @@ typedef struct
vertexregs.y = y;
vertexregs.z = z;
gsvertex[primIndex] = vertexregs;
primIndex = (primIndex + 1) % ARRAY_SIZE(gsvertex);
primIndex = primNext();
}
} GSinternal;
@ -589,14 +612,17 @@ inline float Clamp(float fx, float fmin, float fmax)
return fx > fmax ? fmax : fx;
}
// PSMT16, 16S have shorter color per pixel, also cluted textures with half storage.
inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0)
{
if (PSMT_IS16BIT(tex0.psm) || (PSMT_ISCLUT(tex0.psm) && tex0.cpsm > 1))
return true;
else
return false;
}
// Get pixel storage format from tex0. Clutted textures store pixels in cpsm format.
inline int PIXEL_STORAGE_FORMAT(const tex0Info& tex) {
if (PSMT_ISCLUT(tex.psm))
return tex.cpsm;
else
return tex.psm;
}
// If pixel storage format not PSMCT24 ot PSMCT32, then it is 16-bit.
// Z-textures have 0x30 upper bits, so we eliminate them by &&(~0x30)
inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0) { return ((PIXEL_STORAGE_FORMAT(tex0) & (~0x30)) > 1); }
//--------------------------- Inlines for bitwise ops
//--------------------------- textures

View File

@ -36,7 +36,8 @@ using namespace std;
#include "zerogs.h"
#include "targets.h"
#include "ZeroGSShaders/zerogsshaders.h"
#include "ZZoglShaders.h"
#include "ZZoglFlushHack.h"
#include "ZZoglFlushHack.h"
#ifdef _MSC_VER
@ -64,12 +65,11 @@ bool SaveStateExists = true; // We could not know save slot status before first
const char* SaveStateFile = NULL; // Name of SaveFile for access check.
extern const char* s_aa[5];
extern const char* s_naa[3];
extern const char* pbilinear[];
// statistics
u32 g_nGenVars = 0, g_nTexVars = 0, g_nAlphaVars = 0, g_nResolve = 0;
#define VER 1
#define VER 2
const unsigned char zgsversion = PS2E_GS_VERSION;
unsigned char zgsrevision = 0; // revision and build gives plugin version
unsigned char zgsbuild = VER;
@ -85,7 +85,7 @@ char *libraryName = "ZZ Ogl PG ";
extern int g_nPixelShaderVer, g_nFrameRender, g_nFramesSkipped;
extern void ProcessMessages();
extern void ProcessEvents();
extern void WriteAA();
extern void WriteBilinear();
@ -304,7 +304,7 @@ s32 CALLBACK GSinit()
{
FUNCLOG
if (ZZLog::Open() == false) return -1;
ZZLog::Open();
ZZLog::WriteLn("Calling GSinit.");
WriteTempRegs();
@ -477,8 +477,7 @@ static __forceinline void SetGSTitle()
SaveStateExists = true;
sprintf(strtitle, "ZZ Open GL 0.%d.%d | %.1f fps | %s%s%s savestate %d%s | shaders %s | (%.1f)", zgsbuild, zgsminor, fFPS,
g_pInterlace[conf.interlace], g_pBilinear[conf.bilinear],
(conf.aa >= conf.negaa) ? (conf.aa ? s_aa[conf.aa - conf.negaa] : "") : (conf.negaa ? s_naa[conf.negaa - conf.aa] : ""),
g_pInterlace[conf.interlace], g_pBilinear[conf.bilinear], (conf.aa ? s_aa[conf.aa] : ""),
CurrentSavestate, (SaveStateExists ? "" : "*"),
g_pShaders[g_nPixelShaderVer], (ppf&0xfffff) / (float)UPDATE_FRAMES);
@ -515,7 +514,7 @@ void CALLBACK GSvsync(int interlace)
// !interlace? Hmmm... Fixme.
ZeroGS::RenderCRTC(!interlace);
ProcessMessages();
ProcessEvents();
if (--nToNextUpdate <= 0)
{

View File

@ -390,8 +390,6 @@ void DisplayDialog()
if (gtk_combo_box_get_active(GTK_COMBO_BOX(aa_box)) != -1)
conf.aa = gtk_combo_box_get_active(GTK_COMBO_BOX(aa_box));
conf.negaa = 0;
conf.log = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(log_check));
conf.bilinear = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(bilinear_check));
fake_options.widescreen = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(widescreen_check));

View File

@ -16,6 +16,7 @@
<Compiler>
<Add option="-Wall" />
<Add option="-g" />
<Add option="-DZEROGS_DEVBUILD" />
<Add option="-D_DEBUG" />
</Compiler>
<Linker>
@ -87,7 +88,6 @@
<Add library="jpeg" />
<Add library="GLEW" />
<Add library="GLU" />
<Add library="Xxf86vm" />
<Add library="z" />
<Add library="dl" />
<Add library="stdc++" />
@ -154,9 +154,9 @@
<Unit filename="../../ZZoglFlushHack.h" />
<Unit filename="../../ZZoglSave.cpp" />
<Unit filename="../../ZZoglShaders.cpp" />
<Unit filename="../../ZZoglShaders.h" />
<Unit filename="../../ZZoglShoots.cpp" />
<Unit filename="../../ZZoglVB.cpp" />
<Unit filename="../../ZeroGSShaders/zerogsshaders.h" />
<Unit filename="../../common.h" />
<Unit filename="../../glprocs.c">
<Option compilerVar="CC" />

View File

@ -140,7 +140,7 @@ void __gifCall KickVertex(bool adc)
{
/* tri fans need special processing */
if (gs.nTriFanVert == gs.primIndex)
gs.primIndex = (gs.primIndex + 1) % ArraySize(gs.gsvertex);
gs.primIndex = gs.primNext();
}
}
}

View File

@ -111,6 +111,7 @@ void __gifCall GIFPackedRegHandlerRGBA(const u32* data)
void __gifCall GIFPackedRegHandlerSTQ(const u32* data)
{
FUNCLOG
// Despite this code generating a warning, it's correct. float -> float reduction. S and Y are missed mantissas.
*(u32*)&gs.vertexregs.s = data[0] & 0xffffff00;
*(u32*)&gs.vertexregs.t = data[1] & 0xffffff00;
*(u32*)&gs.q = data[2];
@ -147,7 +148,7 @@ void __forceinline KICK_VERTEX3()
{
/* tri fans need special processing */
if (gs.nTriFanVert == gs.primIndex)
gs.primIndex = (gs.primIndex + 1) % ArraySize(gs.gsvertex);
gs.primIndex = gs.primNext();
}
}
}
@ -160,7 +161,7 @@ void __gifCall GIFPackedRegHandlerXYZF2(const u32* data)
gs.vertexregs.z = (data[2] >> 4) & 0xffffff;
gs.vertexregs.f = (data[3] >> 4) & 0xff;
gs.gsvertex[gs.primIndex] = gs.vertexregs;
gs.primIndex = (gs.primIndex + 1) % ArraySize(gs.gsvertex);
gs.primIndex = gs.primNext();
if (data[3] & 0x8000)
{
@ -179,7 +180,7 @@ void __gifCall GIFPackedRegHandlerXYZ2(const u32* data)
gs.vertexregs.y = (data[1] >> 0) & 0xffff;
gs.vertexregs.z = data[2];
gs.gsvertex[gs.primIndex] = gs.vertexregs;
gs.primIndex = (gs.primIndex + 1) % ArraySize(gs.gsvertex);
gs.primIndex = gs.primNext();
if (data[3] & 0x8000)
{
@ -428,7 +429,7 @@ void __gifCall GIFRegHandlerXYZF2(const u32* data)
gs.vertexregs.z = data[1] & 0xffffff;
gs.vertexregs.f = data[1] >> 24;
gs.gsvertex[gs.primIndex] = gs.vertexregs;
gs.primIndex = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex);
gs.primIndex = gs.primNext();
KICK_VERTEX2();
}
@ -440,7 +441,7 @@ void __gifCall GIFRegHandlerXYZ2(const u32* data)
gs.vertexregs.y = (data[0] >> (16)) & 0xffff;
gs.vertexregs.z = data[1];
gs.gsvertex[gs.primIndex] = gs.vertexregs;
gs.primIndex = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex);
gs.primIndex = gs.primNext();
KICK_VERTEX2();
}
@ -496,7 +497,7 @@ void __gifCall GIFRegHandlerXYZF3(const u32* data)
gs.vertexregs.z = data[1] & 0xffffff;
gs.vertexregs.f = data[1] >> 24;
gs.gsvertex[gs.primIndex] = gs.vertexregs;
gs.primIndex = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex);
gs.primIndex = gs.primNext();
KICK_VERTEX3();
}
@ -508,7 +509,7 @@ void __gifCall GIFRegHandlerXYZ3(const u32* data)
gs.vertexregs.y = (data[0] >> (16)) & 0xffff;
gs.vertexregs.z = data[1];
gs.gsvertex[gs.primIndex] = gs.vertexregs;
gs.primIndex = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex);
gs.primIndex = gs.primNext();
KICK_VERTEX3();
}

View File

@ -201,7 +201,6 @@ typedef struct
u8 mrtdepth; // write color in render target
u8 interlace; // intelacing mode 0, 1, 3-off
u8 aa; // antialiasing 0 - off, 1 - 2x, 2 - 4x, 3 - 8x, 4 - 16x
u8 negaa; // negative aliasing
u8 bilinear; // set to enable bilinear support. 0 - off, 1 -- on, 2 -- force (use for textures that usually need it)
ZZOptions zz_options;
gameHacks hacks; // game options -- different hacks.
@ -352,6 +351,6 @@ extern "C" void * memcpy_amd(void *dest, const void *src, size_t n);
extern "C" u8 memcmp_mmx(const void *dest, const void *src, int n);
#endif
extern bool g_bDisplayFPS; // should we display FPS on screen?
#endif // UTIL_H_INCLUDED

View File

@ -5,7 +5,7 @@
#include "resrc1.h"
#include "GS.h"
#include "zerogsshaders.h"
#include "ZZoglShaders.h"
#include "Win32.h"
#include <map>
@ -169,7 +169,6 @@ void OnOK(HWND hW)
conf.aa = 0;
}
conf.negaa = 0;
conf.zz_options._u32 = 0;
conf.zz_options.capture_avi = IsDlgButtonChecked(hW, IDC_CONFIG_CAPTUREAVI) ? 1 : 0;

View File

@ -4,6 +4,9 @@
//
#define IDC_CONF_DEFAULT 3
#define IDR_DATA1 112
#define IDD_ADV_OPTIONS 113
#define IDD_DIALOG1 114
#define IDD_CONFIG2 114
#define IDC_ABOUTTEXT 1015
#define IDC_CONFIG_AA 1016
#define IDC_CONFIG_INTERLACE 1017
@ -64,20 +67,20 @@
#define IDC_CONFOPT_00040000 1063
#define IDC_CONFOPT_02000000 1064
#define IDC_CONFOPT_04000000 1065
#define IDC_CONFOPT_10000000 1068
#define IDC_CONFOPT_20000000 1069
#define IDC_CONFOPT_00000100 1066
#define IDC_CONFIG_WIDESCREEN 1067
#define IDC_CONFOPT_10000000 1068
#define IDC_CONFOPT_20000000 1069
#define IDC_CONFIG_AA8 2003
#define IDC_CONFIG_AA16 2004
// Next default values for new objects
//
//
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE 113
#define _APS_NEXT_RESOURCE_VALUE 116
#define _APS_NEXT_COMMAND_VALUE 40001
#define _APS_NEXT_CONTROL_VALUE 1049
#define _APS_NEXT_CONTROL_VALUE 1051
#define _APS_NEXT_SYMED_VALUE 101
#endif
#endif

View File

@ -40,88 +40,199 @@ IDR_SHADERS RCDATA "ps2hw.dat"
//
// Dialog
//
IDD_CONFIG DIALOGEX 0, 0, 530, 290
STYLE DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU
STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU
CAPTION "ZZ OpenGL PG Configuration"
FONT 8, "Tahoma"
{
CONTROL "Logging (For Debugging)", 1000, BUTTON, BS_AUTOCHECKBOX | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 5, 6, 102, 10
CONTROL "Interlace Enable (toggle with F5). There are 2 modes + interlace off", IDC_CONFIG_INTERLACE, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 5, 64, 137, 18
CONTROL "Bilinear Filtering (Shift+F5). Best quality is on, turn off for speed.", IDC_CONFIG_BILINEAR, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 5, 81, 137, 18
CONTROL "None", IDC_CONFIG_AANONE, BUTTON, BS_AUTORADIOBUTTON | WS_CHILD | WS_VISIBLE | WS_GROUP, 21, 38, 34, 11
CONTROL "2X", IDC_CONFIG_AA2, BUTTON, BS_AUTORADIOBUTTON | WS_CHILD | WS_VISIBLE, 21, 48, 26, 11
CONTROL "4X", IDC_CONFIG_AA4, BUTTON, BS_AUTORADIOBUTTON | WS_CHILD | WS_VISIBLE, 63, 38, 28, 11
CONTROL "8X", IDC_CONFIG_AA8, BUTTON, BS_AUTORADIOBUTTON | WS_CHILD | WS_VISIBLE, 63, 48, 26, 11
CONTROL "16X", IDC_CONFIG_AA16, BUTTON, BS_AUTORADIOBUTTON | WS_CHILD | WS_VISIBLE, 100, 42, 28, 11
CONTROL "Wireframe rendering (F7)", IDC_CONFIG_WIREFRAME, BUTTON, BS_AUTOCHECKBOX | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 5, 183, 96, 10
CONTROL "Capture Avi (zerogs.avi) (F12)", IDC_CONFIG_CAPTUREAVI, BUTTON, BS_AUTOCHECKBOX | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 5, 194, 109, 10
CONTROL "Save Snapshots as BMP(default is JPG)", IDC_CONFIG_BMPSS, BUTTON, BS_AUTOCHECKBOX | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 5, 205, 141, 10
CONTROL "Fullscreen (Alt+Enter),to get out press Alt+Enter again", IDC_CONFIG_FULLSCREEN, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 5, 150, 135, 18
CONTROL "Wide Screen", IDC_CONFIG_WIDESCREEN, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 5, 167, 109, 10
CONTROL "640 x 480", IDC_CONF_WIN640, BUTTON, BS_AUTORADIOBUTTON | WS_CHILD | WS_VISIBLE | WS_GROUP, 17, 123, 59, 8
CONTROL "800 x 600", IDC_CONF_WIN800, BUTTON, BS_AUTORADIOBUTTON | WS_CHILD | WS_VISIBLE, 17, 135, 59, 8
CONTROL "1024 x 768", IDC_CONF_WIN1024, BUTTON, BS_AUTORADIOBUTTON | WS_CHILD | WS_VISIBLE, 82, 123, 59, 8
CONTROL "1280 x 960", IDC_CONF_WIN1280, BUTTON, BS_AUTORADIOBUTTON | WS_CHILD | WS_VISIBLE, 82, 135, 53, 8
CONTROL "Anti-aliasing for sharper graphics (F6)", IDC_STATIC, BUTTON, BS_GROUPBOX | WS_CHILD | WS_VISIBLE, 5, 21, 137, 41
CONTROL "Default Window Size (no speed impact)", IDC_STATIC, BUTTON, BS_GROUPBOX | WS_CHILD | WS_VISIBLE, 6, 111, 137, 39
CONTROL "Show Frames Per Second (Shift+F7)", IDC_STATIC, STATIC, SS_LEFT | WS_CHILD | WS_VISIBLE | WS_GROUP, 15, 219, 118, 20
CONTROL "shortcuts: F6 - next, Shift+F6 - prev", IDC_STATIC, STATIC, SS_LEFT | WS_CHILD | WS_VISIBLE | WS_GROUP, 13, 28, 123, 11
CONTROL "Advanced Options", IDC_STATIC, BUTTON, BS_GROUPBOX | WS_CHILD | WS_VISIBLE, 150, 0, 375, 272
CONTROL "Each option is presented with a unique ID in hex.", IDC_STATIC, STATIC, SS_LEFT | WS_CHILD | WS_VISIBLE | WS_GROUP, 155, 10, 365, 8
CONTROL "Note, setting options here means that they will be ADDED to whatever options are set automatically.", IDC_STATIC, STATIC, SS_LEFT | WS_CHILD | WS_VISIBLE | WS_GROUP, 155, 20, 365, 8
CONTROL "Enable Multiple RTS - 00100000", IDC_CONFOPT_00100000, BUTTON, BS_AUTOCHECKBOX | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 30, 365, 8
CONTROL "Disable alpha testing - 00080000", IDC_CONFOPT_00080000, BUTTON, BS_AUTOCHECKBOX | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 40, 365, 8
CONTROL "Disable stencil buffer - 00002000, Usually safe to do for simple scenes. Harvest Moon", IDC_CONFOPT_00002000, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 50, 365, 8
CONTROL "No color clamping - 00000040, Speeds games up but might be too bright or too dim", IDC_CONFOPT_00000040, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 60, 365, 8
CONTROL "Disable depth updates - 00000200", IDC_CONFOPT_00000200, BUTTON, BS_AUTOCHECKBOX | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 70, 365, 8
CONTROL "No target CLUT - 00001000, Use on RE4, or foggy scenes.", IDC_CONFOPT_00001000, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 80, 365, 8
CONTROL "Alpha Fail hack - 00000100, Remove vertical stripes, coloring artefacts. (Sonic Unleashed, Shadow the Hedgehog, Ghost in the Shell)", IDC_CONFOPT_00000100, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 90, 365, 16
CONTROL "Exact color testing - 00000020, Fixes overbright or shadow/black artifacts (crash n burn)", IDC_CONFOPT_00000020, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 110, 365, 8
CONTROL "Tex Target checking - 00000001, Lego racers", IDC_CONFOPT_00000001, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 120, 365, 8
CONTROL "Interlace 2X - 00000004, Fixes 2x bigger screen (Gradius 3)", IDC_CONFOPT_00000004, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 130, 365, 8
CONTROL "Resolve Hack #2 - 00000800, Shadow Hearts, Urbz, (Destroys FFX)", IDC_CONFOPT_00000800, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 140, 365, 8
CONTROL "Fast Update - 00040000, Speeds some games - (Okami, Sonic Unleashed)", 10IDC_CONFOPT_00040000, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 150, 365, 8
CONTROL "Partial Depth - 04000000, Tries to save the depth target as much as possible (mgs3)", IDC_CONFOPT_04000000, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 160, 365, 8
CONTROL "No target resolves - 00000010, Stops target resolving(try this first for very slow games). (Dark Cloud 1)", IDC_CONFOPT_00000010, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 170, 365, 8
CONTROL "No logarithmic-Z - 20000000, Decreases number of Z artefacts", IDC_CONFOPT_20000000, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 180, 365, 8
CONTROL "No depth resolve - 00008000, Might give z buffer artifacts", IDC_CONFOPT_00008000, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 190, 365, 8
CONTROL "Auto Reset Targs - 00000002, Use when game is slow and toggling AA fixes it (SH, samurai warriors)", IDC_CONFOPT_00000002, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 200, 365, 8
CONTROL "Full 16 bit resolution - 00010000, Use when half the screen is missing, etc", IDC_CONFOPT_00010000, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 210, 365, 8
CONTROL "Resolve Hack #1 - 00000400, Speeds some games (Kingdom Hearts)", IDC_CONFOPT_00000400, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 220, 365, 8
CONTROL "Partial Targets - 02000000, Reduces artifacts and speeds up some games (mgs3)", IDC_CONFOPT_02000000, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 230, 365, 8
CONTROL "Resolve Hack #3 - 00020000, Neopets", IDC_CONFOPT_00020000, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 240, 365, 8
CONTROL "Specular Highlights - 01000000, Makes xenosaga and Okage graphics faster by removing highlights.", IDC_CONFOPT_01000000, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 250, 365, 8
CONTROL "Gust fix - 10000000, Makes gust games cleaner and faster.", IDC_CONFOPT_10000000, BUTTON, BS_AUTOCHECKBOX | BS_MULTILINE | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 155, 260, 365, 8
CONTROL "OK", IDOK, BUTTON, BS_DEFPUSHBUTTON | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 5, 275, 50, 12
CONTROL "Cancel", IDCANCEL, BUTTON, BS_PUSHBUTTON | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 92, 275, 50, 12
CONTROL "Use Defaults (recommended)", IDC_CONF_DEFAULT, BUTTON, BS_PUSHBUTTON | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 171, 275, 151, 12
CONTROL "", IDC_CONFOPT_IDS, EDIT, ES_LEFT | ES_AUTOHSCROLL | ES_READONLY | WS_CHILD | WS_VISIBLE | WS_BORDER | WS_TABSTOP, 451, 275, 48, 12
CONTROL "Compute OR of IDS", IDC_CONFOPT_COMPUTEOR, BUTTON, BS_PUSHBUTTON | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 373, 275, 73, 12
}
FONT 8, "Tahoma", 0, 0, 0x1
BEGIN
CONTROL "Logging (For Debugging)",1000,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,5,6,102,10
CONTROL "Interlace Enable (toggle with F5). There are 2 modes + interlace off",IDC_CONFIG_INTERLACE,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,5,64,137,18
CONTROL "Bilinear Filtering (Shift+F5). Best quality is on, turn off for speed.",IDC_CONFIG_BILINEAR,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,5,81,137,18
CONTROL "None",IDC_CONFIG_AANONE,"Button",BS_AUTORADIOBUTTON | WS_GROUP,21,38,34,11
CONTROL "2X",IDC_CONFIG_AA2,"Button",BS_AUTORADIOBUTTON,21,48,26,11
CONTROL "4X",IDC_CONFIG_AA4,"Button",BS_AUTORADIOBUTTON,63,38,28,11
CONTROL "8X",IDC_CONFIG_AA8,"Button",BS_AUTORADIOBUTTON,63,48,26,11
CONTROL "16X",IDC_CONFIG_AA16,"Button",BS_AUTORADIOBUTTON,100,42,28,11
CONTROL "Wireframe rendering (F7)",IDC_CONFIG_WIREFRAME,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,5,183,96,10
CONTROL "Capture Avi (zerogs.avi) (F12)",IDC_CONFIG_CAPTUREAVI,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,5,194,109,10
CONTROL "Save Snapshots as BMP(default is JPG)",IDC_CONFIG_BMPSS,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,5,205,141,10
CONTROL "Fullscreen (Alt+Enter),to get out press Alt+Enter again",IDC_CONFIG_FULLSCREEN,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,5,150,135,18
CONTROL "Wide Screen",IDC_CONFIG_WIDESCREEN,"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,5,167,109,10
CONTROL "640 x 480",IDC_CONF_WIN640,"Button",BS_AUTORADIOBUTTON | WS_GROUP,17,123,59,8
CONTROL "800 x 600",IDC_CONF_WIN800,"Button",BS_AUTORADIOBUTTON,17,135,59,8
CONTROL "1024 x 768",IDC_CONF_WIN1024,"Button",BS_AUTORADIOBUTTON,82,123,59,8
CONTROL "1280 x 960",IDC_CONF_WIN1280,"Button",BS_AUTORADIOBUTTON,82,135,53,8
GROUPBOX "Anti-aliasing for sharper graphics (F6)",IDC_STATIC,5,21,137,41
GROUPBOX "Default Window Size (no speed impact)",IDC_STATIC,6,111,137,39
LTEXT "Show Frames Per Second (Shift+F7)",IDC_STATIC,15,219,118,20
LTEXT "shortcuts: F6 - next, Shift+F6 - prev",IDC_STATIC,13,28,123,11
GROUPBOX "Advanced Options",IDC_STATIC,150,0,375,272
LTEXT "Each option is presented with a unique ID in hex.",IDC_STATIC,155,10,365,8
LTEXT "Note, setting options here means that they will be ADDED to whatever options are set automatically.",IDC_STATIC,155,20,365,8
CONTROL "Enable Multiple RTS - 00100000",IDC_CONFOPT_00100000,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,155,30,365,8
CONTROL "Disable alpha testing - 00080000",IDC_CONFOPT_00080000,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,155,40,365,8
CONTROL "Disable stencil buffer - 00002000, Usually safe to do for simple scenes. Harvest Moon",IDC_CONFOPT_00002000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,50,365,8
CONTROL "No color clamping - 00000040, Speeds games up but might be too bright or too dim",IDC_CONFOPT_00000040,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,60,365,8
CONTROL "Disable depth updates - 00000200",IDC_CONFOPT_00000200,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,155,70,365,8
CONTROL "No target CLUT - 00001000, Use on RE4, or foggy scenes.",IDC_CONFOPT_00001000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,80,365,8
CONTROL "Alpha Fail hack - 00000100, Remove vertical stripes, coloring artefacts. (Sonic Unleashed, Shadow the Hedgehog, Ghost in the Shell)",IDC_CONFOPT_00000100,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,90,365,16
CONTROL "Exact color testing - 00000020, Fixes overbright or shadow/black artifacts (crash n burn)",IDC_CONFOPT_00000020,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,110,365,8
CONTROL "Tex Target checking - 00000001, Lego racers",IDC_CONFOPT_00000001,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,120,365,8
CONTROL "Interlace 2X - 00000004, Fixes 2x bigger screen (Gradius 3)",IDC_CONFOPT_00000004,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,130,365,8
CONTROL "Resolve Hack #2 - 00000800, Shadow Hearts, Urbz, (Destroys FFX)",IDC_CONFOPT_00000800,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,140,365,8
CONTROL "Fast Update - 00040000, Speeds some games - (Okami, Sonic Unleashed)",35527,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,150,365,8
CONTROL "Partial Depth - 04000000, Tries to save the depth target as much as possible (mgs3)",IDC_CONFOPT_04000000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,160,365,8
CONTROL "No target resolves - 00000010, Stops target resolving(try this first for very slow games). (Dark Cloud 1)",IDC_CONFOPT_00000010,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,170,365,8
CONTROL "No logarithmic-Z - 20000000, Decreases number of Z artefacts",IDC_CONFOPT_20000000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,180,365,8
CONTROL "No depth resolve - 00008000, Might give z buffer artifacts",IDC_CONFOPT_00008000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,190,365,8
CONTROL "Auto Reset Targs - 00000002, Use when game is slow and toggling AA fixes it (SH, samurai warriors)",IDC_CONFOPT_00000002,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,200,365,8
CONTROL "Full 16 bit resolution - 00010000, Use when half the screen is missing, etc",IDC_CONFOPT_00010000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,210,365,8
CONTROL "Resolve Hack #1 - 00000400, Speeds some games (Kingdom Hearts)",IDC_CONFOPT_00000400,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,220,365,8
CONTROL "Partial Targets - 02000000, Reduces artifacts and speeds up some games (mgs3)",IDC_CONFOPT_02000000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,230,365,8
CONTROL "Resolve Hack #3 - 00020000, Neopets",IDC_CONFOPT_00020000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,240,365,8
CONTROL "Specular Highlights - 01000000, Makes xenosaga and Okage graphics faster by removing highlights.",IDC_CONFOPT_01000000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,250,365,8
CONTROL "Gust fix - 10000000, Makes gust games cleaner and faster.",IDC_CONFOPT_10000000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,155,260,365,8
DEFPUSHBUTTON "OK",IDOK,5,275,50,12
PUSHBUTTON "Cancel",IDCANCEL,92,275,50,12
PUSHBUTTON "Use Defaults (recommended)",IDC_CONF_DEFAULT,171,275,151,12
EDITTEXT IDC_CONFOPT_IDS,451,275,48,12,ES_AUTOHSCROLL | ES_READONLY
PUSHBUTTON "Compute OR of IDS",IDC_CONFOPT_COMPUTEOR,373,275,73,12
END
IDD_ABOUT DIALOGEX 0, 0, 182, 220
STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_POPUP | WS_CAPTION | WS_SYSMENU
CAPTION "GSabout"
FONT 8, "MS Sans Serif"
{
DEFPUSHBUTTON "OK",IDOK,65,199,50,14
LTEXT "ZZogl\n\nauthor: Zeydlitz(@gmail.com)\n\n\nthanks to Gabest for SSE optimizations",IDC_STATIC,7,7,160,47
LTEXT "Static",IDC_ABOUTTEXT,7,65,152,124
}
FONT 8, "MS Sans Serif", 0, 0, 0x1
BEGIN
DEFPUSHBUTTON "OK",IDOK,65,199,50,14
LTEXT "ZZogl\n\nauthor: Zeydlitz(@gmail.com)\n\n\nthanks to Gabest for SSE optimizations",IDC_STATIC,7,7,160,47
LTEXT "Static",IDC_ABOUTTEXT,7,65,152,124
END
IDD_LOGGING DIALOG 0, 0, 152, 55
STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_POPUP | WS_CAPTION | WS_SYSMENU
CAPTION "Dialog"
FONT 8, "MS Sans Serif"
{
BEGIN
DEFPUSHBUTTON "OK",IDOK,40,35,50,14
PUSHBUTTON "Cancel",IDCANCEL,95,35,50,14
CONTROL "Log",IDC_LOG,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,60,15,28,10
}
END
IDD_ADV_OPTIONS DIALOGEX 0, 0, 391, 308
STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU
CAPTION "Advanced Options"
FONT 8, "MS Shell Dlg", 400, 0, 0x1
BEGIN
DEFPUSHBUTTON "OK",IDOK,279,287,50,14
PUSHBUTTON "Cancel",IDCANCEL,333,287,50,14
GROUPBOX "Advanced Options",IDC_STATIC,7,6,376,276
LTEXT "Each option is presented with a unique ID in hex.",IDC_STATIC,14,15,365,8
LTEXT "Note, setting options here means that they will be ADDED to whatever options are set automatically.",IDC_STATIC,14,25,365,8
CONTROL "Enable Multiple RTS - 00100000",IDC_CONFOPT_00100000,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,36,365,8
CONTROL "Disable alpha testing - 00080000",IDC_CONFOPT_00080000,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,46,365,8
CONTROL "Disable stencil buffer - 00002000, Usually safe to do for simple scenes. Harvest Moon",IDC_CONFOPT_00002000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,56,365,8
CONTROL "No color clamping - 00000040, Speeds games up but might be too bright or too dim",IDC_CONFOPT_00000040,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,66,365,8
CONTROL "Disable depth updates - 00000200",IDC_CONFOPT_00000200,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,76,365,8
CONTROL "No target CLUT - 00001000, Use on RE4, or foggy scenes.",IDC_CONFOPT_00001000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,86,365,8
CONTROL "Alpha Fail hack - 00000100, Remove vertical stripes, coloring artefacts. (Sonic Unleashed, Shadow the Hedgehog, Ghost in the Shell)",IDC_CONFOPT_00000100,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,96,365,16
CONTROL "Exact color testing - 00000020, Fixes overbright or shadow/black artifacts (crash n burn)",IDC_CONFOPT_00000020,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,116,365,8
CONTROL "Tex Target checking - 00000001, Lego racers",IDC_CONFOPT_00000001,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,126,365,8
CONTROL "Interlace 2X - 00000004, Fixes 2x bigger screen (Gradius 3)",IDC_CONFOPT_00000004,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,136,365,8
CONTROL "Resolve Hack #2 - 00000800, Shadow Hearts, Urbz, (Destroys FFX)",IDC_CONFOPT_00000800,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,146,365,8
CONTROL "Fast Update - 00040000, Speeds some games - (Okami, Sonic Unleashed)",35527,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,156,365,8
CONTROL "Partial Depth - 04000000, Tries to save the depth target as much as possible (mgs3)",IDC_CONFOPT_04000000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,166,365,8
CONTROL "No target resolves - 00000010, Stops target resolving(try this first for very slow games). (Dark Cloud 1)",IDC_CONFOPT_00000010,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,176,365,8
CONTROL "No logarithmic-Z - 20000000, Decreases number of Z artefacts",IDC_CONFOPT_20000000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,186,365,8
CONTROL "No depth resolve - 00008000, Might give z buffer artifacts",IDC_CONFOPT_00008000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,196,365,8
CONTROL "Auto Reset Targs - 00000002, Use when game is slow and toggling AA fixes it (SH, samurai warriors)",IDC_CONFOPT_00000002,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,206,365,8
CONTROL "Full 16 bit resolution - 00010000, Use when half the screen is missing, etc",IDC_CONFOPT_00010000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,216,365,8
CONTROL "Resolve Hack #1 - 00000400, Speeds some games (Kingdom Hearts)",IDC_CONFOPT_00000400,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,226,365,8
CONTROL "Partial Targets - 02000000, Reduces artifacts and speeds up some games (mgs3)",IDC_CONFOPT_02000000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,236,365,8
CONTROL "Resolve Hack #3 - 00020000, Neopets",IDC_CONFOPT_00020000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,246,365,8
CONTROL "Specular Highlights - 01000000, Makes xenosaga and Okage graphics faster by removing highlights.",IDC_CONFOPT_01000000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,256,365,8
CONTROL "Gust fix - 10000000, Makes gust games cleaner and faster.",IDC_CONFOPT_10000000,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,266,365,8
END
IDD_CONFIG2 DIALOGEX 0, 0, 171, 217
STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU
CAPTION "ZZOgl Options"
FONT 8, "MS Shell Dlg", 400, 0, 0x1
BEGIN
DEFPUSHBUTTON "OK",IDOK,55,192,50,14
PUSHBUTTON "Cancel",IDCANCEL,108,192,50,14
GROUPBOX "Static",IDC_STATIC,7,7,152,183
CONTROL "Logging (For Debugging)",1000,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,18,102,10
CONTROL "Interlace Enable (toggle with F5). There are 2 modes + interlace off",IDC_CONFIG_INTERLACE,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,45,137,18
CONTROL "Bilinear Filtering (Shift+F5). Best quality is on, turn off for speed.",IDC_CONFIG_BILINEAR,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,67,137,18
CONTROL "Capture Avi (zerogs.avi) (F12)",IDC_CONFIG_CAPTUREAVI,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,103,109,10
CONTROL "Save Snapshots as BMP(default is JPG)",IDC_CONFIG_BMPSS,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,116,141,10
CONTROL "Wide Screen",IDC_CONFIG_WIDESCREEN,"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,90,109,10
CONTROL "640 x 480",IDC_CONF_WIN640,"Button",BS_AUTORADIOBUTTON | WS_GROUP,20,140,59,8
CONTROL "800 x 600",IDC_CONF_WIN800,"Button",BS_AUTORADIOBUTTON,21,152,59,8
CONTROL "1024 x 768",IDC_CONF_WIN1024,"Button",BS_AUTORADIOBUTTON,86,140,59,8
CONTROL "1280 x 960",IDC_CONF_WIN1280,"Button",BS_AUTORADIOBUTTON,86,151,53,8
GROUPBOX "Default Window Size (no speed impact)",IDC_STATIC,14,129,137,39
COMBOBOX IDC_COMBO1,59,31,48,30,CBS_DROPDOWNLIST | CBS_SORT | WS_VSCROLL | WS_TABSTOP
LTEXT "Anti-aliasing",IDC_STATIC,15,33,43,13
PUSHBUTTON "Advanced...",IDC_BUTTON1,17,170,134,14
END
/////////////////////////////////////////////////////////////////////////////
@ -137,7 +248,6 @@ BEGIN
LEFTMARGIN, 7
RIGHTMARGIN, 420
TOPMARGIN, 7
BOTTOMMARGIN, 394
END
IDD_ABOUT, DIALOG
@ -155,6 +265,22 @@ BEGIN
TOPMARGIN, 7
BOTTOMMARGIN, 48
END
IDD_ADV_OPTIONS, DIALOG
BEGIN
LEFTMARGIN, 7
RIGHTMARGIN, 389
TOPMARGIN, 6
BOTTOMMARGIN, 301
END
IDD_CONFIG2, DIALOG
BEGIN
LEFTMARGIN, 7
RIGHTMARGIN, 164
TOPMARGIN, 7
BOTTOMMARGIN, 210
END
END
#endif // APSTUDIO_INVOKED
@ -185,6 +311,27 @@ END
#endif // APSTUDIO_INVOKED
/////////////////////////////////////////////////////////////////////////////
//
// Dialog Info
//
IDD_CONFIG2 DLGINIT
BEGIN
IDC_COMBO1, 0x403, 5, 0
0x6f4e, 0x656e, "\000"
IDC_COMBO1, 0x403, 3, 0
0x5832, "\000"
IDC_COMBO1, 0x403, 3, 0
0x5834, "\000"
IDC_COMBO1, 0x403, 3, 0
0x5838, "\000"
IDC_COMBO1, 0x403, 4, 0
0x3631, 0x0058,
0
END
#endif // English (U.S.) resources
/////////////////////////////////////////////////////////////////////////////

View File

@ -376,6 +376,10 @@
RelativePath="..\ZZoglFlush.cpp"
>
</File>
<File
RelativePath="..\ZZoglFlushHack.cpp"
>
</File>
<File
RelativePath="..\ZZoglSave.cpp"
>
@ -489,6 +493,14 @@
RelativePath="..\ZZLog.h"
>
</File>
<File
RelativePath="..\ZZoglFlushHack.h"
>
</File>
<File
RelativePath="..\ZZoglShaders.h"
>
</File>
</Filter>
<Filter
Name="Resource Files"
@ -515,6 +527,10 @@
>
</File>
</Filter>
<File
RelativePath=".\ps2hw.dat"
>
</File>
<File
RelativePath="..\ps2hw.dat"
>

View File

@ -46,12 +46,6 @@ inline void* wglGetProcAddress(const char* x)
#endif
#include <Cg/cg.h>
#include <Cg/cgGL.h>
#include <cstring>
#include "zerogsmath.h"
#include "ZeroGSShaders/zerogsshaders.h"
extern u32 s_stencilfunc, s_stencilref, s_stencilmask;
// Defines
@ -82,47 +76,10 @@ extern u32 s_stencilfunc, s_stencilref, s_stencilmask;
// global alpha blending settings
extern GLenum g_internalFloatFmt;
extern GLenum g_internalRGBAFloatFmt;
extern GLenum g_internalRGBAFloat16Fmt;
extern CGprogram g_vsprog, g_psprog;
extern CGparameter g_vparamPosXY[2], g_fparamFogColor;
extern const char* ShaderCallerName;
extern const char* ShaderHandleName;
extern const GLenum primtype[8];
inline void SetShaderCaller(const char* Name)
{
ShaderCallerName = Name;
}
inline void SetHandleName(const char* Name)
{
ShaderHandleName = Name;
}
extern void HandleCgError(CGcontext ctx, CGerror err, void* appdata);
extern void ZZcgSetParameter4fv(CGparameter param, const float* v, const char* name);
#define SETVERTEXSHADER(prog) { \
if( (prog) != g_vsprog ) { \
cgGLBindProgram(prog); \
g_vsprog = prog; \
} \
} \
#define SETPIXELSHADER(prog) { \
if( (prog) != g_psprog ) { \
cgGLBindProgram(prog); \
g_psprog = prog; \
} \
} \
#define SAFE_RELEASE_PROG(x) { if( (x) != NULL ) { cgDestroyProgram(x); x = NULL; } }
#define SAFE_RELEASE_TEX(x) { if( (x) != 0 ) { glDeleteTextures(1, &(x)); x = 0; } }
// inline for an extremely often used sequence
@ -138,7 +95,6 @@ inline void DisableAllgl()
glColorMask(1, 1, 1, 1);
}
//--------------------- Dummies
#ifdef _WIN32
@ -152,105 +108,8 @@ extern void (APIENTRY *zgsBlendFuncSeparateEXT)(GLenum, GLenum, GLenum, GLenum);
// ------------------------ Types -------------------------
struct FRAGMENTSHADER
{
FRAGMENTSHADER() : prog(0), sMemory(0), sFinal(0), sBitwiseANDX(0), sBitwiseANDY(0), sInterlace(0), sCLUT(0), sOneColor(0), sBitBltZ(0),
fTexAlpha2(0), fTexOffset(0), fTexDims(0), fTexBlock(0), fClampExts(0), fTexWrapMode(0),
fRealTexDims(0), fTestBlack(0), fPageOffset(0), fTexAlpha(0) {}
CGprogram prog;
CGparameter sMemory, sFinal, sBitwiseANDX, sBitwiseANDY, sInterlace, sCLUT;
CGparameter sOneColor, sBitBltZ, sInvTexDims;
CGparameter fTexAlpha2, fTexOffset, fTexDims, fTexBlock, fClampExts, fTexWrapMode, fRealTexDims, fTestBlack, fPageOffset, fTexAlpha;
void set_uniform_param(CGparameter &var, const char *name)
{
CGparameter p;
p = cgGetNamedParameter(prog, name);
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) var = p;
}
bool set_texture(GLuint texobj, const char *name)
{
CGparameter p;
p = cgGetNamedParameter(prog, name);
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE)
{
cgGLSetTextureParameter(p, texobj);
cgGLEnableTextureParameter(p);
return true;
}
return false;
}
bool connect(CGparameter &tex, const char *name)
{
CGparameter p;
p = cgGetNamedParameter(prog, name);
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE)
{
cgConnectParameter(tex, p);
return true;
}
return false;
}
bool set_texture(CGparameter &tex, const char *name)
{
CGparameter p;
p = cgGetNamedParameter(prog, name);
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE)
{
//cgGLEnableTextureParameter(p);
tex = p;
return true;
}
return false;
}
bool set_shader_const(Vector v, const char *name)
{
CGparameter p;
p = cgGetNamedParameter(prog, name);
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE)
{
cgGLSetParameter4fv(p, v);
return true;
}
return false;
}
};
/////////////////////
// graphics resources
extern map<string, GLbyte> mapGLExtensions;
//extern map<int, SHADERHEADER*> mapShaderResources;
struct VERTEXSHADER
{
VERTEXSHADER() : prog(0), sBitBltPos(0), sBitBltTex(0) {}
CGprogram prog;
CGparameter sBitBltPos, sBitBltTex, fBitBltTrans; // vertex shader constants
};
extern CGprofile cgvProf, cgfProf;
extern CGprogram pvs[16];
extern FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS];
extern FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2];
extern GLenum s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha; // set by zgsBlendFuncSeparateEXT
// GL prototypes

View File

@ -21,7 +21,7 @@
#include "Util.h"
#include "GS.h"
#include "ZeroGSShaders/zerogsshaders.h"
#include "ZZoglShaders.h"
#include "Profile.h"
#include "GLWin.h"
@ -34,7 +34,6 @@ extern u32 THR_KeyEvent; // value for passing out key events between threads
extern bool THR_bShift, SaveStateExists;
const char* s_aa[5] = { "AA none |", "AA 2x |", "AA 4x |", "AA 8x |", "AA 16x |" };
const char* s_naa[3] = { "native res |", "res /2 |", "res /4 |" };
const char* pbilinear[] = { "off", "normal", "forced" };
@ -96,7 +95,6 @@ void ProcessAASetting(bool reverse)
void ProcessFPS()
{
FUNCLOG
extern bool g_bDisplayFPS;
g_bDisplayFPS ^= 1;
ZZLog::Debug_Log("Toggled FPS.");
}
@ -112,31 +110,6 @@ void ProcessWireFrame()
ZZLog::WriteToScreen(strtitle);
}
void ProcessNegAASetting(bool reverse)
{
FUNCLOG
char strtitle[256];
if (reverse)
{
conf.negaa--; // -1
if (conf.negaa > 2) conf.negaa = 2; // u8 in unsigned, so negative value is 255.
sprintf(strtitle, "down resolution - %s", s_naa[conf.negaa]);
ZeroGS::SetNegAA(conf.negaa);
}
else
{
conf.negaa++;
if (conf.negaa > 2) conf.negaa = 0;
sprintf(strtitle, "down resolution - %s", s_naa[conf.negaa]);
ZeroGS::SetNegAA(conf.negaa);
}
ZZLog::WriteToScreen(strtitle);
SaveConfig();
}
typedef struct GameHackStruct
{
const char HackName[40];
@ -281,7 +254,7 @@ void WriteBilinear()
}
#ifdef _WIN32
void ProcessMessages()
void ProcessEvents()
{
MSG msg;
@ -349,7 +322,7 @@ void ProcessMessages()
#else // linux
void ProcessMessages()
void ProcessEvents()
{
FUNCLOG

View File

@ -34,21 +34,16 @@ bool IsLogging()
return (gsLog != NULL && conf.log);
}
bool Open()
void Open()
{
bool result = true;
const std::string LogFile(s_strLogPath + "GSzzogl.log");
gsLog = fopen(LogFile.c_str(), "w");
if (gsLog != NULL)
setvbuf(gsLog, NULL, _IONBF, 0);
else
{
SysMessage("Can't create log file %s\n", LogFile.c_str());
result = false;
}
return result;
}
void Close()
@ -64,9 +59,11 @@ void SetDir(const char* dir)
// Get the path to the log directory.
s_strLogPath = (dir==NULL) ? "logs/" : dir;
// Reload the log file after updated the path
Close();
Open();
// Reload previously open log file
if (gsLog) {
Close();
Open();
}
}
void WriteToScreen(const char* pstr, u32 ms)
@ -167,9 +164,11 @@ void Greg_Log(const char *fmt, ...)
va_start(list, fmt);
fprintf(gsLog, "GRegs: ");
if (IsLogging()) {
fprintf(gsLog, "GRegs: ");
vfprintf(gsLog, fmt, list);
}
//fprintf(stderr,"GRegs: ");
if (IsLogging()) vfprintf(gsLog, fmt, list);
//vfprintf(stderr, fmt, list);
va_end(list);

View File

@ -161,7 +161,6 @@ namespace ZeroGS
{
extern void AddMessage(const char* pstr, u32 ms);
extern void SetAA(int mode);
extern void SetNegAA(int mode);
extern bool Create(int width, int height);
extern void Destroy(bool bD3D);
extern void StartCapture();
@ -172,7 +171,7 @@ namespace ZZLog
{
extern bool IsLogging();
void SetDir(const char* dir);
extern bool Open();
extern void Open();
extern void Close();
extern void Message(const char *fmt, ...);
extern void Log(const char *fmt, ...);

View File

@ -23,6 +23,7 @@
//------------------ Includes
#include "ZZoglCRTC.h"
#include "GLWin.h"
#include "ZZoglShaders.h"
using namespace ZeroGS;
@ -176,7 +177,6 @@ inline void FrameObtainDispinfo(u32 bInterlace, tex0Info* dispinfo)
}
}
// Something should be done before Renderering the picture.
inline void RenderStartHelper(u32 bInterlace)
{
@ -190,6 +190,7 @@ inline void RenderStartHelper(u32 bInterlace)
// return;
// }
//#endif
if (conf.mrtdepth && pvs[8] == NULL)
{
conf.mrtdepth = 0;
@ -221,7 +222,7 @@ inline void RenderStartHelper(u32 bInterlace)
glClear(GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
}
SETVERTEXSHADER(pvsBitBlt.prog);
ZZshSetVertexShader(pvsBitBlt.prog);
glBindBuffer(GL_ARRAY_BUFFER, vboRect);
SET_STREAM();
@ -281,7 +282,7 @@ inline Vector RenderGetForClip(u32 bInterlace, int interlace, int psm, FRAGMENTS
valpha.w = 1;
}
ZZcgSetParameter4fv(prog->sOneColor, valpha, "g_fOneColor");
ZZshSetParameter4fv(prog->sOneColor, valpha, "g_fOneColor");
return valpha;
}
@ -294,8 +295,7 @@ inline void RenderCreateInterlaceTex(u32 bInterlace, int th, FRAGMENTSHADER* pro
int interlacetex = CreateInterlaceTex(2 * th);
cgGLSetTextureParameter(prog->sInterlace, interlacetex);
cgGLEnableTextureParameter(prog->sInterlace);
ZZshGLSetTextureParameter(prog->sInterlace, interlacetex, "Interlace");
}
// Well, do blending setup prior to second pass of half-frame drawing
@ -359,7 +359,7 @@ inline void RenderCRTC24helper(u32 bInterlace, int interlace, int psm)
SetShaderCaller("RenderCRTC24helper");
// assume that data is already in ptexMem (do Resolve?)
RenderGetForClip(bInterlace, interlace, psm, &ppsCRTC24[bInterlace]);
SETPIXELSHADER(ppsCRTC24[bInterlace].prog);
ZZshSetPixelShader(ppsCRTC24[bInterlace].prog);
DrawTriangleArray();
}
@ -416,7 +416,7 @@ inline Vector RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace)
v.w += 1.0f / (float)dh ;
}
ZZcgSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_fBitBltPos");
ZZshSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_fBitBltPos");
return v;
}
@ -440,7 +440,7 @@ inline Vector RenderSetTargetBitTex(float th, float tw, float dh, float dw, bool
v.w += 1.0f / conf.height;
}
ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
return v;
}
@ -451,7 +451,7 @@ inline Vector RenderSetTargetBitTrans(int th)
{
SetShaderCaller("RenderSetTargetBitTrans");
Vector v = Vector(float(th), -float(th), float(th), float(th));
ZZcgSetParameter4fv(pvsBitBlt.fBitBltTrans, v, "g_fBitBltTrans");
ZZshSetParameter4fv(pvsBitBlt.fBitBltTrans, v, "g_fBitBltTrans");
return v;
}
@ -469,7 +469,7 @@ inline Vector RenderSetTargetInvTex(int bInterlace, int tw, int th, FRAGMENTSHAD
v.y = 1.0f / (float)th;
v.z = (float)0.0;
v.w = -0.5f / (float)th;
ZZcgSetParameter4fv(prog->sInvTexDims, v, "g_fInvTexDims");
ZZshSetParameter4fv(prog->sInvTexDims, v, "g_fInvTexDims");
}
return v;
@ -554,11 +554,10 @@ inline void RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& list
Vector valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTCTarg[bInterlace]);
// inside vb[0]'s target area, so render that region only
cgGLSetTextureParameter(ppsCRTCTarg[bInterlace].sFinal, ptarg->ptex);
cgGLEnableTextureParameter(ppsCRTCTarg[bInterlace].sFinal);
ZZshGLSetTextureParameter(ppsCRTCTarg[bInterlace].sFinal, ptarg->ptex, "CRTC target");
RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTCTarg[bInterlace]);
SETPIXELSHADER(ppsCRTCTarg[bInterlace].prog);
ZZshSetPixelShader(ppsCRTCTarg[bInterlace].prog);
DrawTriangleArray();
@ -591,10 +590,7 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
}
// context has to be 0
CMemoryTarget* pmemtarg = g_MemTargs.GetMemoryTarget(texframe, 1);
if ((pmemtarg == NULL) || (bInterlace >= 2))
ZZLog::Error_Log("CRCR Check for memory shader fault.");
if (bInterlace >= 2) ZZLog::Error_Log("CRCR Check for memory shader fault.");
//if (!(*bUsingStencil)) RenderUpdateStencil(i, bUsingStencil);
@ -607,7 +603,7 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
h1 = texframe.th;
w2 = -0.5f;
h2 = -0.5f;
SetTexVariablesInt(0, 2, texframe, pmemtarg, &ppsCRTC[bInterlace], 1);
SetTexVariablesInt(0, 2, texframe, false, &ppsCRTC[bInterlace], 1);
}
else
{
@ -615,7 +611,7 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
h1 = 1;
w2 = -0.5f / (float)texframe.tw;
h2 = -0.5f / (float)texframe.th;
SetTexVariablesInt(0, 0, texframe, pmemtarg, &ppsCRTC[bInterlace], 1);
SetTexVariablesInt(0, 0, texframe, false, &ppsCRTC[bInterlace], 1);
}
if (g_bSaveFinalFrame) SaveTex(&texframe, g_bSaveFinalFrame - 1 > 0);
@ -630,12 +626,9 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
v = RenderSetTargetInvTex(bInterlace, texframe.tw, texframe.th, &ppsCRTC[bInterlace]);
Vector valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTC[bInterlace]);
cgGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, pmemtarg->ptex->tex);
cgGLEnableTextureParameter(ppsCRTC[bInterlace].sMemory);
ZZshGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, vb[0].pmemtarg->ptex->tex, "CRTC memory");
RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTC[bInterlace]);
SETPIXELSHADER(ppsCRTC[bInterlace].prog);
ZZshSetPixelShader(ppsCRTC[bInterlace].prog);
DrawTriangleArray();
}

View File

@ -25,7 +25,7 @@
#include "zerogs.h"
#include "GLWin.h"
#include "ZeroGSShaders/zerogsshaders.h"
#include "ZZoglShaders.h"
#include "targets.h"
// This include for windows resource file with Shaders
#ifdef _WIN32
@ -66,17 +66,6 @@
}
#define GL_BLEND_SET() zgsBlendFuncSeparateEXT(s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha)
#define GL_STENCILFUNC(func, ref, mask) { \
s_stencilfunc = func; \
s_stencilref = ref; \
s_stencilmask = mask; \
glStencilFunc(func, ref, mask); \
}
#define GL_STENCILFUNC_SET() glStencilFunc(s_stencilfunc, s_stencilref, s_stencilmask)
#define VB_BUFFERSIZE 0x400
#define VB_NUMBUFFERS 512
// ----------------- Types
@ -97,7 +86,6 @@ extern void KickDummy();
extern bool LoadEffects();
extern bool LoadExtraEffects();
extern FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
VERTEXSHADER pvsBitBlt;
GLuint vboRect = 0;
vector<GLuint> g_vboBuffers; // VBOs for all drawing commands
@ -139,10 +127,9 @@ void (APIENTRY *zgsBlendFuncSeparateEXT)(GLenum, GLenum, GLenum, GLenum) = NULL;
//------------------ variables
////////////////////////////
// State parameters
float fiRendWidth, fiRendHeight;
extern u8* s_lpShaderResources;
CGprogram pvs[16] = {NULL};
ZZshProgram pvs[16] = {NULL};
// String's for shader file in developer mode
#ifdef DEVBUILD
@ -167,10 +154,10 @@ int nLogoWidth, nLogoHeight;
u32 s_ptexInterlace = 0; // holds interlace fields
//------------------ Global Variables
int GPU_TEXWIDTH = 512;
float g_fiGPU_TEXWIDTH = 1/512.0f;
int g_MaxTexWidth = 4096, g_MaxTexHeight = 4096;
u32 s_uFramebuffer = 0;
CGprofile cgvProf, cgfProf;
int g_nPixelShaderVer = 0; // default
RasterFont* font_p = NULL;
float g_fBlockMult = 1;
@ -179,7 +166,6 @@ float g_fBlockMult = 1;
u32 ptexBlocks = 0, ptexConv16to32 = 0; // holds information on block tiling
u32 ptexBilinearBlocks = 0;
u32 ptexConv32to16 = 0;
bool g_bDisplayMsg = 1;
int g_nDepthBias = 0;
//u32 g_bSaveFlushedFrame = 0;
@ -190,13 +176,10 @@ bool ZeroGS::IsGLExt(const char* szTargetExtension)
return mapGLExtensions.find(string(szTargetExtension)) != mapGLExtensions.end();
}
inline bool
ZeroGS::Create_Window(int _width, int _height)
inline bool ZeroGS::Create_Window(int _width, int _height)
{
nBackbufferWidth = _width;
nBackbufferHeight = _height;
fiRendWidth = 1.0f / nBackbufferWidth;
fiRendHeight = 1.0f / nBackbufferHeight;
if (!GLWin.DisplayWindow(_width, _height)) return false;
@ -233,20 +216,9 @@ inline bool ZeroGS::CreateImportantCheck()
ZZLog::Error_Log("*********\nZZogl: OGL WARNING: Need GL_EXT_secondary_color\nZZogl: *********");
bSuccess = false;
}
// load the effect & find the best profiles (if any)
if (cgGLIsProfileSupported(CG_PROFILE_ARBVP1) != CG_TRUE)
{
ZZLog::Error_Log("arbvp1 not supported.");
bSuccess = false;
}
if (cgGLIsProfileSupported(CG_PROFILE_ARBFP1) != CG_TRUE)
{
ZZLog::Error_Log("arbfp1 not supported.");
bSuccess = false;
}
bSuccess &= ZZshCheckProfilesSupport();
return bSuccess;
}
@ -454,9 +426,6 @@ inline bool CreateFillExtensionsMap()
return true;
}
const static char* g_pShaders[] = { "full", "reduced", "accurate", "accurate-reduced" };
void LoadglFunctions()
{
GL_LOADFN(glIsRenderbufferEXT);
@ -478,6 +447,20 @@ void LoadglFunctions()
GL_LOADFN(glGenerateMipmapEXT);
}
inline bool TryBlockFormat(GLint fmt, const GLvoid* vBlockData) {
g_internalFloatFmt = fmt;
glTexImage2D(GL_TEXTURE_2D, 0, g_internalFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_ALPHA, GL_FLOAT, vBlockData);
return (glGetError() == GL_NO_ERROR);
}
inline bool TryBlinearFormat(GLint fmt32, GLint fmt16, const GLvoid* vBilinearData) {
g_internalRGBAFloatFmt = fmt32;
g_internalRGBAFloat16Fmt = fmt16;
glTexImage2D(GL_TEXTURE_2D, 0, g_internalRGBAFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_RGBA, GL_FLOAT, vBilinearData);
return (glGetError() == GL_NO_ERROR);
}
bool ZeroGS::Create(int _width, int _height)
{
GLenum err = GL_NO_ERROR;
@ -487,7 +470,6 @@ bool ZeroGS::Create(int _width, int _height)
Destroy(1);
GSStateReset();
cgSetErrorHandler(HandleCgError, NULL);
g_RenderFormatType = RFT_float16;
if (!Create_Window(_width, _height)) return false;
@ -501,10 +483,10 @@ bool ZeroGS::Create(int _width, int _height)
// Limit the texture size supported to 8192. We do not need bigger texture.
// Besides the following assertion is false when texture are too big.
// ZZoglFlush.cpp:2349: assert(fblockstride >= 1.0f)
g_MaxTexWidth = min(8192, g_MaxTexWidth);
//g_MaxTexWidth = min(8192, g_MaxTexWidth);
g_MaxTexHeight = g_MaxTexWidth / 4;
GPU_TEXWIDTH = g_MaxTexWidth / 8;
GPU_TEXWIDTH = min (g_MaxTexWidth/8, 1024);
g_fiGPU_TEXWIDTH = 1.0f / GPU_TEXWIDTH;
if (!CreateOpenShadersFile()) return false;
@ -628,43 +610,32 @@ bool ZeroGS::Create(int _width, int _height)
glGenTextures(1, &ptexBlocks);
glBindTexture(GL_TEXTURE_2D, ptexBlocks);
g_internalFloatFmt = GL_RGBA32F; // This is OpenGL 3.0 standard format, so it should be implemented in new cards.
g_internalRGBAFloatFmt = GL_RGBA32F;
g_internalRGBAFloat16Fmt = GL_RGBA16F;
glTexImage2D(GL_TEXTURE_2D, 0, g_internalFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_ALPHA, GL_FLOAT, &vBlockData[0]);
if (glGetError() != GL_NO_ERROR)
{
// try different internal format
g_internalFloatFmt = GL_ALPHA_FLOAT32_ATI;
glTexImage2D(GL_TEXTURE_2D, 0, g_internalFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_ALPHA, GL_FLOAT, &vBlockData[0]);
if (TryBlockFormat(GL_RGBA32F, &vBlockData[0]))
ZZLog::Error_Log("Use GL_RGBA32F for blockdata.");
else if (TryBlockFormat(GL_ALPHA_FLOAT32_ATI, &vBlockData[0]))
ZZLog::Error_Log("Use ATI_texture_float for blockdata.");
else if (TryBlockFormat(GL_ALPHA32F_ARB, &vBlockData[0]))
ZZLog::Error_Log("Use ARB_texture_float for blockdata.");
else
{ // This case is bad. But for really old cards it could be nice.
g_fBlockMult = 65535.0f*(float)g_fiGPU_TEXWIDTH;
BLOCK::FillBlocks(vBlockData, vBilinearData, 0);
g_internalFloatFmt = GL_ALPHA16;
if (glGetError() != GL_NO_ERROR)
{
// This case is bad. But for really old cards it could be nice.
g_fBlockMult = 65535.0f*(float)g_fiGPU_TEXWIDTH ;
BLOCK::FillBlocks(vBlockData, vBilinearData, 0);
g_internalFloatFmt = GL_ALPHA16 ;
// We store block data on u16 rather float numbers. It's not as precise, but ALPHA16 is OpenGL 2.0 standard
// and uses only 16 bit. Old zerogs use red channel, but it does not work.
glTexImage2D(GL_TEXTURE_2D, 0, g_internalFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_ALPHA, GL_UNSIGNED_SHORT, &vBlockData[0]);
if (glGetError() != GL_NO_ERROR)
{
ZZLog::Error_Log("Could not fill blocks.");
return false;
}
do_not_use_billinear = true;
ZZLog::Debug_Log("Using non-bilinear fill, quallity is outdated!");
}
else
ZZLog::Debug_Log("Use ATI_texture_float for blockdata.");
// We store block data on u16 rather float numbers. It's not so preciese, but ALPHA16 is OpenGL 2.0 standart
// and use only 16 bit. Old zerogs use red channel, but it does not work.
glTexImage2D(GL_TEXTURE_2D, 0, g_internalFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_ALPHA, GL_UNSIGNED_SHORT, &vBlockData[0]);
if( glGetError() != GL_NO_ERROR ) {
ZZLog::Error_Log("ZZogl ERROR: could not fill blocks");
return false;
}
do_not_use_billinear = true;
conf.bilinear = 0;
ZZLog::Error_Log("Using non-bilinear fill, quallity is outdated!");
}
else
ZZLog::Debug_Log("Use GL_RGBA32F for blockdata.");
setTex2DFilters(GL_NEAREST);
setTex2DWrap(GL_REPEAT);
@ -674,33 +645,15 @@ bool ZeroGS::Create(int _width, int _height)
// fill in the bilinear blocks (main variant).
glGenTextures(1, &ptexBilinearBlocks);
glBindTexture(GL_TEXTURE_2D, ptexBilinearBlocks);
glTexImage2D(GL_TEXTURE_2D, 0, g_internalRGBAFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_RGBA, GL_FLOAT, &vBilinearData[0]);
if (glGetError() != GL_NO_ERROR)
{
g_internalRGBAFloatFmt = GL_RGBA_FLOAT32_ATI;
g_internalRGBAFloat16Fmt = GL_RGBA_FLOAT16_ATI;
glTexImage2D(GL_TEXTURE_2D, 0, g_internalRGBAFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_RGBA, GL_FLOAT, &vBilinearData[0]);
if (glGetError() != GL_NO_ERROR)
{
g_internalRGBAFloatFmt = GL_FLOAT_RGBA32_NV;
g_internalRGBAFloat16Fmt = GL_FLOAT_RGBA16_NV;
glTexImage2D(GL_TEXTURE_2D, 0, g_internalRGBAFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_RGBA, GL_FLOAT, &vBilinearData[0]);
if (glGetError() != GL_NO_ERROR)
{
ZZLog::Error_Log("Fill bilinear blocks failed!");
return false;
}
else
ZZLog::Debug_Log("Fill bilinear blocks with NVidia_float.");
}
else
ZZLog::Debug_Log("Fill bilinear blocks with ATI_texture_float.");
}
else
ZZLog::Debug_Log("Fill bilinear blocks OK.!");
if (TryBlinearFormat(GL_RGBA32F, GL_RGBA16F, &vBilinearData[0]))
ZZLog::Error_Log("Fill bilinear blocks OK.!");
else if (TryBlinearFormat(GL_RGBA_FLOAT32_ATI, GL_RGBA_FLOAT16_ATI, &vBilinearData[0]))
ZZLog::Error_Log("Fill bilinear blocks with ATI_texture_float.");
else if (TryBlinearFormat(GL_FLOAT_RGBA32_NV, GL_FLOAT_RGBA16_NV, &vBilinearData[0]))
ZZLog::Error_Log("ZZogl Fill bilinear blocks with NVidia_float.");
else
ZZLog::Error_Log("Fill bilinear blocks failed.");
setTex2DFilters(GL_NEAREST);
setTex2DWrap(GL_REPEAT);
@ -814,72 +767,7 @@ bool ZeroGS::Create(int _width, int _height)
if (err != GL_NO_ERROR) bSuccess = false;
g_cgcontext = cgCreateContext();
cgvProf = CG_PROFILE_ARBVP1;
cgfProf = CG_PROFILE_ARBFP1;
cgGLEnableProfile(cgvProf);
cgGLEnableProfile(cgfProf);
cgGLSetOptimalOptions(cgvProf);
cgGLSetOptimalOptions(cgfProf);
cgGLSetManageTextureParameters(g_cgcontext, CG_FALSE);
//cgSetAutoCompile(g_cgcontext, CG_COMPILE_IMMEDIATE);
g_fparamFogColor = cgCreateParameter(g_cgcontext, CG_FLOAT4);
g_vparamPosXY[0] = cgCreateParameter(g_cgcontext, CG_FLOAT4);
g_vparamPosXY[1] = cgCreateParameter(g_cgcontext, CG_FLOAT4);
ZZLog::GS_Log("Creating effects.");
B_G(LoadEffects(), return false);
g_bDisplayMsg = 0;
// create a sample shader
clampInfo temp;
memset(&temp, 0, sizeof(temp));
temp.wms = 3;
temp.wmt = 3;
g_nPixelShaderVer = 0;//SHADER_ACCURATE;
// test
bool bFailed;
FRAGMENTSHADER* pfrag = LoadShadeEffect(0, 1, 1, 1, 1, temp, 0, &bFailed);
if (bFailed || pfrag == NULL)
{
g_nPixelShaderVer = SHADER_ACCURATE | SHADER_REDUCED;
pfrag = LoadShadeEffect(0, 0, 1, 1, 0, temp, 0, &bFailed);
if (pfrag != NULL)
cgGLLoadProgram(pfrag->prog);
if (bFailed || pfrag == NULL || cgGetError() != CG_NO_ERROR)
{
g_nPixelShaderVer = SHADER_REDUCED;
ZZLog::Error_Log("Basic shader test failed.");
}
}
g_bDisplayMsg = 1;
if (g_nPixelShaderVer & SHADER_REDUCED) conf.bilinear = 0;
ZZLog::GS_Log("Creating extra effects.");
B_G(LoadExtraEffects(), return false);
ZZLog::GS_Log("Using %s shaders.", g_pShaders[g_nPixelShaderVer]);
if (!ZZshStartUsingShaders()) bSuccess = false;
GL_REPORT_ERROR();

View File

@ -26,6 +26,7 @@
#include "zerogs.h"
#include "targets.h"
#include "ZZoglFlushHack.h"
#include "ZZoglShaders.h"
using namespace ZeroGS;
@ -119,14 +120,11 @@ void Draw(const VB& curvb)
//------------------ variables
extern int g_nDepthBias;
extern float g_fBlockMult;
extern float g_fBlockMult; // used for old cards, that do not support Alpha-32float textures. We store block data in u16 and use it.
bool g_bUpdateStencil = 1;
//u32 g_SaveFrameNum = 0; // ZZ
int GPU_TEXWIDTH = 512;
float g_fiGPU_TEXWIDTH = 1 / 512.0f;
extern CGprogram g_psprog; // 2 -- ZZ
extern ZZshProgram g_psprog; // 2 -- ZZ
// local alpha blending settings
static GLenum s_rgbeq, s_alphaeq; // set by zgsBlendEquationSeparateEXT // ZZ
@ -201,8 +199,8 @@ namespace ZeroGS
VB vb[2];
float fiTexWidth[2], fiTexHeight[2]; // current tex width and height
u8 s_AAx = 0, s_AAy = 0; // if AAy is set, then AAx has to be set
u8 s_AAz = 0, s_AAw = 0; // if AAy is set, then AAx has to be set
//u8 s_AAx = 0, s_AAy = 0; // if AAy is set, then AAx has to be set
Point AA = {0,0};
int icurctx = -1;
@ -219,11 +217,11 @@ void ResetAlphaVariables();
inline void SetAlphaTestInt(pixTest curtest);
inline void RenderAlphaTest(const VB& curvb, CGparameter sOneColor);
inline void RenderAlphaTest(const VB& curvb, ZZshParameter sOneColor);
inline void RenderStencil(const VB& curvb, u32 dwUsingSpecialTesting);
inline void ProcessStencil(const VB& curvb);
inline void RenderFBA(const VB& curvb, CGparameter sOneColor);
inline void ProcessFBA(const VB& curvb, CGparameter sOneColor); // zz
inline void RenderFBA(const VB& curvb, ZZshParameter sOneColor);
inline void ProcessFBA(const VB& curvb, ZZshParameter sOneColor); // zz
}
@ -249,6 +247,14 @@ inline void SetAlphaTest(const pixTest& curtest)
glAlphaFunc(g_dwAlphaCmp[curtest.atst], AlphaReferedValue(curtest.aref));
}
}
// Return, if tcc, aem or psm mode told us, than Alpha test should be used
// if tcc == 0 than no alpha used, aem used for alpha expanding and I am not sure
// that it's correct, psm -- color mode,
inline bool IsAlphaTestExpansion(tex0Info tex0)
{
return (tex0.tcc && gs.texa.aem && PSMT_ALPHAEXP(PIXEL_STORAGE_FORMAT(tex0)));
}
// Switch wireframe rendering off for first flush, so it's draw few solid primitives
inline void SwitchWireframeOff()
@ -853,7 +859,7 @@ inline Vector FlushSetPageOffset(FRAGMENTSHADER* pfragment, int shadertype, CRen
// zoe2
if (PSMT_ISZTEX(ptextarg->psm)) vpageoffset.w = -1.0f;
ZZcgSetParameter4fv(pfragment->fPageOffset, vpageoffset, "g_fPageOffset");
ZZshSetParameter4fv(pfragment->fPageOffset, vpageoffset, "g_fPageOffset");
return vpageoffset;
}
@ -871,7 +877,7 @@ inline Vector FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& c
v.y = 16.0f / (float)curvb.tex0.th;
v.z = 0.5f * v.x;
v.w = 0.5f * v.y;
ZZcgSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset");
ZZshSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset");
}
else if (shadertype == 4)
{
@ -880,7 +886,7 @@ inline Vector FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& c
v.y = 16.0f / (float)ptextarg->fbh;
v.z = -1;
v.w = 8.0f / (float)ptextarg->fbh;
ZZcgSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset");
ZZshSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset");
}
return v;
@ -914,7 +920,7 @@ inline Vector FlushTextureDims(FRAGMENTSHADER* pfragment, int shadertype, VB& cu
if (shadertype == 4)
vTexDims.z += 8.0f;
ZZcgSetParameter4fv(pfragment->fTexDims, vTexDims, "g_fTexDims");
ZZshSetParameter4fv(pfragment->fTexDims, vTexDims, "g_fTexDims");
return vTexDims;
}
@ -951,11 +957,11 @@ inline FRAGMENTSHADER* FlushUseExistRenderTarget(VB& curvb, CRenderTarget* ptext
GLuint ptexclut = 0;
//int psm = GetTexCPSM(curvb.tex0);
//int psm = PIXEL_STORAGE_FORMAT(curvb.tex0);
int shadertype = FlushGetShaderType(curvb, ptextarg, ptexclut);
FRAGMENTSHADER* pfragment = LoadShadeEffect(shadertype, 0, curvb.curprim.fge,
IsAlphaTestExpansion(curvb), exactcolor, curvb.clamp, context, NULL);
IsAlphaTestExpansion(curvb.tex0), exactcolor, curvb.clamp, context, NULL);
Vector vpageoffset = FlushSetPageOffset(pfragment, shadertype, ptextarg);
@ -964,10 +970,7 @@ inline FRAGMENTSHADER* FlushUseExistRenderTarget(VB& curvb, CRenderTarget* ptext
Vector vTexDims = FlushTextureDims(pfragment, shadertype, curvb, ptextarg);
if (pfragment->sCLUT != NULL && ptexclut != 0)
{
cgGLSetTextureParameter(pfragment->sCLUT, ptexclut);
cgGLEnableTextureParameter(pfragment->sCLUT);
}
ZZshGLSetTextureParameter(pfragment->sCLUT, ptexclut, "CLUT");
FlushApplyResizeFilter(curvb, dwFilterOpts, ptextarg, context);
@ -997,7 +1000,7 @@ inline FRAGMENTSHADER* FlushMadeNewTarget(VB& curvb, int exactcolor, int context
}
FRAGMENTSHADER* pfragment = LoadShadeEffect(0, GetTexFilter(curvb.tex1), curvb.curprim.fge,
IsAlphaTestExpansion(curvb), exactcolor, curvb.clamp, context, NULL);
IsAlphaTestExpansion(curvb.tex0), exactcolor, curvb.clamp, context, NULL);
if (pfragment == NULL)
ZZLog::Error_Log("Could not find memory target shader.");
@ -1011,35 +1014,25 @@ inline void FlushSetTexture(VB& curvb, FRAGMENTSHADER* pfragment, CRenderTarget*
SetTexVariables(context, pfragment);
SetTexInt(context, pfragment, ptextarg == NULL);
// have to enable the texture parameters(curtest.atst=
// have to enable the texture parameters(curtest.atst)
if( curvb.ptexClamp[0] != 0 )
ZZshGLSetTextureParameter(pfragment->sBitwiseANDX, curvb.ptexClamp[0], "Clamp 0");
if( curvb.ptexClamp[1] != 0 )
ZZshGLSetTextureParameter(pfragment->sBitwiseANDY, curvb.ptexClamp[1], "Clamp 1");
if( pfragment->sMemory != NULL && s_ptexCurSet[context] != 0)
ZZshGLSetTextureParameter(pfragment->sMemory, s_ptexCurSet[context], "Clamp memory");
if (curvb.ptexClamp[0] != 0)
{
cgGLSetTextureParameter(pfragment->sBitwiseANDX, curvb.ptexClamp[0]);
cgGLEnableTextureParameter(pfragment->sBitwiseANDX);
}
if (curvb.ptexClamp[1] != 0)
{
cgGLSetTextureParameter(pfragment->sBitwiseANDY, curvb.ptexClamp[1]);
cgGLEnableTextureParameter(pfragment->sBitwiseANDY);
}
if (pfragment->sMemory != NULL && s_ptexCurSet[context] != 0)
{
cgGLSetTextureParameter(pfragment->sMemory, s_ptexCurSet[context]);
cgGLEnableTextureParameter(pfragment->sMemory);
}
}
// Reset programm and texture variables;
// Reset program and texture variables;
inline void FlushBindProgramm(FRAGMENTSHADER* pfragment, int context)
{
vb[context].bTexConstsSync = 0;
vb[context].bVarsTexSync = 0;
cgGLBindProgram(pfragment->prog);
g_psprog = pfragment->prog;
ZZshSetPixelShader(pfragment->prog);
}
inline FRAGMENTSHADER* FlushRendererStage(VB& curvb, u32& dwFilterOpts, CRenderTarget* ptextarg, int exactcolor, int context)
@ -1072,8 +1065,8 @@ inline FRAGMENTSHADER* FlushRendererStage(VB& curvb, u32& dwFilterOpts, CRenderT
GL_REPORT_ERRORD();
// set the shaders
SetShaderCaller("FlushRendererStage") ;
SETVERTEXSHADER(pvs[2 * ((curvb.curprim._val >> 1) & 3) + 8 * s_bWriteDepth + context]);
SetShaderCaller("FlushRendererStage");
ZZshSetVertexShader(pvs[2 * ((curvb.curprim._val >> 1) & 3) + 8 * s_bWriteDepth + context]);
FlushBindProgramm(pfragment, context);
GL_REPORT_ERRORD();
@ -1116,9 +1109,6 @@ inline void AlphaSetDepthTest(VB& curvb, const pixTest curtest, FRAGMENTSHADER*
GL_ZTEST(curtest.zte);
// glEnable (GL_POLYGON_OFFSET_FILL);
// glPolygonOffset (-1., -1.);
if (s_bWriteDepth)
{
if (!curvb.zbuf.zmsk)
@ -1180,13 +1170,13 @@ inline u32 AlphaRenderAlpha(VB& curvb, const pixTest curtest, FRAGMENTSHADER* pf
v.w *= 255;
}
ZZcgSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
}
else
{
// not using blending so set to defaults
Vector v = exactcolor ? Vector(1, 510 * 255.0f / 256.0f, 0, 0) : Vector(1, 2 * 255.0f / 256.0f, 0, 0);
ZZcgSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
}
@ -1277,7 +1267,7 @@ inline void AlphaPabe(VB& curvb, FRAGMENTSHADER* pfragment, int exactcolor)
if (exactcolor) v.y *= 255;
ZZcgSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
Draw(curvb);
@ -1346,7 +1336,7 @@ inline void AlphaFailureTestJob(VB& curvb, const pixTest curtest, FRAGMENTSHADE
if (exactcolor) { v.y *= 255; v.w *= 255; }
ZZcgSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
glEnable(GL_BLEND);
GL_STENCILFUNC(GL_EQUAL, s_stencilref | STENCIL_FBA, s_stencilmask | STENCIL_FBA);
@ -1370,7 +1360,7 @@ inline void AlphaFailureTestJob(VB& curvb, const pixTest curtest, FRAGMENTSHADE
if (exactcolor) v.y *= 255;
ZZcgSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
Draw(curvb);
@ -1422,7 +1412,7 @@ inline void AlphaSpecialTesting(VB& curvb, FRAGMENTSHADER* pfragment, u32 dwUsin
glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
Vector v = Vector(0, exactcolor ? 510.0f : 2.0f, 0, 0);
ZZcgSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
Draw(curvb);
// don't need to restore
@ -1469,12 +1459,11 @@ inline void AlphaSaveTarget(VB& curvb)
//#endif
// char str[255];
// sprintf(str, "frames/frame%.4d.tga", g_SaveFrameNum++);
// if( (g_bSaveFlushedFrame & 2) ) {
// //glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, 0 ); // switch to the backbuffer
// //glFlush();
// //SaveTexture("tex.jpg", GL_TEXTURE_RECTANGLE_NV, curvb.prndr->ptex, RW(curvb.prndr->fbw), RH(curvb.prndr->fbh));
// SaveRenderTarget(str, RW(curvb.prndr->fbw), RH(curvb.prndr->fbh), 0);
// }
// //glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, 0 ); // switch to the backbuffer
// //glFlush();
// //SaveTexture("tex.jpg", GL_TEXTURE_RECTANGLE_NV, curvb.prndr->ptex, RW(curvb.prndr->fbw), RH(curvb.prndr->fbh));
// SaveRenderTarget(str, RW(curvb.prndr->fbw), RH(curvb.prndr->fbh), 0);
// }
#endif
}
@ -1500,7 +1489,7 @@ inline void AlphaColorClamping(VB& curvb, const pixTest curtest)
SetShaderCaller("AlphaColorClamping");
SETPIXELSHADER(ppsOne.prog);
ZZshSetPixelShader(ppsOne.prog);
GL_BLEND_RGB(GL_ONE, GL_ONE);
float f;
@ -1508,7 +1497,7 @@ inline void AlphaColorClamping(VB& curvb, const pixTest curtest)
if (bAlphaClamping & 1) // min
{
f = 0;
ZZcgSetParameter4fv(ppsOne.sOneColor, &f, "g_fOneColor");
ZZshSetParameter4fv(ppsOne.sOneColor, &f, "g_fOneColor");
GL_BLENDEQ_RGB(GL_MAX_EXT);
Draw(curvb);
}
@ -1517,7 +1506,7 @@ inline void AlphaColorClamping(VB& curvb, const pixTest curtest)
if (bAlphaClamping & 2) // max
{
f = 1;
ZZcgSetParameter4fv(ppsOne.sOneColor, &f, "g_fOneColor");
ZZshSetParameter4fv(ppsOne.sOneColor, &f, "g_fOneColor");
GL_BLENDEQ_RGB(GL_MIN_EXT);
Draw(curvb);
}
@ -1615,7 +1604,7 @@ void ZeroGS::FlushBoth()
Flush(1);
}
inline void ZeroGS::RenderFBA(const VB& curvb, CGparameter sOneColor)
inline void ZeroGS::RenderFBA(const VB& curvb, ZZshParameter sOneColor)
{
// add fba to all pixels
GL_STENCILFUNC(GL_ALWAYS, STENCIL_FBA, 0xff);
@ -1636,7 +1625,7 @@ inline void ZeroGS::RenderFBA(const VB& curvb, CGparameter sOneColor)
Vector v(1,2,0,0);
ZZcgSetParameter4fv(sOneColor, v, "g_fOneColor");
ZZshSetParameter4fv(sOneColor, v, "g_fOneColor");
Draw(curvb);
@ -1659,7 +1648,7 @@ inline void ZeroGS::RenderFBA(const VB& curvb, CGparameter sOneColor)
GL_ZTEST(curvb.test.zte);
}
__forceinline void ZeroGS::RenderAlphaTest(const VB& curvb, CGparameter sOneColor)
__forceinline void ZeroGS::RenderAlphaTest(const VB& curvb, ZZshParameter sOneColor)
{
if (!g_bUpdateStencil) return;
@ -1675,7 +1664,7 @@ __forceinline void ZeroGS::RenderAlphaTest(const VB& curvb, CGparameter sOneColo
Vector v(1,2,0,0);
ZZcgSetParameter4fv(sOneColor, v, "g_fOneColor");
ZZshSetParameter4fv(sOneColor, v, "g_fOneColor");
// or a 1 to the stencil buffer wherever alpha passes
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
@ -1699,7 +1688,7 @@ __forceinline void ZeroGS::RenderAlphaTest(const VB& curvb, CGparameter sOneColo
if (curvb.test.ate && curvb.test.atst > 1 && curvb.test.aref > 0x80)
{
v = Vector(1,1,0,0);
ZZcgSetParameter4fv(sOneColor, v, "g_fOneColor");
ZZshSetParameter4fv(sOneColor, v, "g_fOneColor");
glAlphaFunc(g_dwAlphaCmp[curvb.test.atst], AlphaReferedValue(curvb.test.aref));
}
@ -1763,7 +1752,7 @@ inline void ZeroGS::ProcessStencil(const VB& curvb)
SetShaderCaller("ProcessStencil");
SETPIXELSHADER(ppsOne.prog);
ZZshSetPixelShader(ppsOne.prog);
Draw(curvb);
// process when alpha >= 0xff
@ -1797,7 +1786,7 @@ inline void ZeroGS::ProcessStencil(const VB& curvb)
glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
}
__forceinline void ZeroGS::ProcessFBA(const VB& curvb, CGparameter sOneColor)
__forceinline void ZeroGS::ProcessFBA(const VB& curvb, ZZshParameter sOneColor)
{
if ((curvb.frame.fbm&0x80000000)) return;
@ -1823,8 +1812,8 @@ __forceinline void ZeroGS::ProcessFBA(const VB& curvb, CGparameter sOneColor)
GL_BLENDEQ_ALPHA(GL_FUNC_ADD);
float f = 1;
ZZcgSetParameter4fv(sOneColor, &f, "g_fOneColor");
SETPIXELSHADER(ppsOne.prog);
ZZshSetParameter4fv(sOneColor, &f, "g_fOneColor");
ZZshSetPixelShader(ppsOne.prog);
Draw(curvb);
glDisable(GL_ALPHA_TEST);
@ -1980,13 +1969,13 @@ void ZeroGS::SetTexInt(int context, FRAGMENTSHADER* pfragment, int settexint)
{
if (vb[context].pmemtarg != pmemtarg)
{
SetTexVariablesInt(context, GetTexFilter(vb[context].tex1), tex0, pmemtarg, pfragment, s_bForceTexFlush);
SetTexVariablesInt(context, GetTexFilter(vb[context].tex1), tex0, true, pfragment, s_bForceTexFlush);
vb[context].bVarsTexSync = true;
}
}
else
{
SetTexVariablesInt(context, GetTexFilter(vb[context].tex1), tex0, pmemtarg, pfragment, s_bForceTexFlush);
SetTexVariablesInt(context, GetTexFilter(vb[context].tex1), tex0, false, pfragment, s_bForceTexFlush);
vb[context].bVarsTexSync = true;
INC_TEXVARS();
@ -2081,10 +2070,10 @@ void ZeroGS::SetTexClamping(int context, FRAGMENTSHADER* pfragment)
}
if (pfragment->fTexWrapMode != 0)
ZZcgSetParameter4fv(pfragment->fTexWrapMode, v, "g_fTexWrapMode");
ZZshSetParameter4fv(pfragment->fTexWrapMode, v, "g_fTexWrapMode");
if (pfragment->fClampExts != 0)
ZZcgSetParameter4fv(pfragment->fClampExts, v2, "g_fClampExts");
ZZshSetParameter4fv(pfragment->fClampExts, v2, "g_fClampExts");
}
@ -2122,9 +2111,9 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment)
Vector valpha, valpha2 ;
// if clut, use the frame format
int psm = GetTexCPSM(tex0);
int psm = PIXEL_STORAGE_FORMAT(tex0);
// printf ( "A %d psm, is-clut %d. cpsm %d | %d %d\n", psm, PSMT_ISCLUT(psm), tex0.cpsm, tex0.tfx, tex0.tcc );
// ZZLog::Error_Log( "A %d psm, is-clut %d. cpsm %d | %d %d", psm, PSMT_ISCLUT(psm), tex0.cpsm, tex0.tfx, tex0.tcc );
Vector vblack;
vblack.x = vblack.y = vblack.z = vblack.w = 10;
@ -2149,7 +2138,7 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment)
valpha2.z = (tex0.tfx != 1) * 2 ;
valpha2.w = (tex0.tfx == 0) ;
if (tex0.tcc == 0 || !nNeedAlpha(psm))
if (tex0.tcc == 0 || !PSMT_ALPHAEXP(psm))
{
valpha.x = 0 ;
valpha.y = (!!tex0.tcc) * (1 + (tex0.tfx == 0)) ;
@ -2157,7 +2146,8 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment)
else
{
valpha.x = (gs.texa.fta[0]) * (1 + (tex0.tfx == 0)) ;
valpha.y = (gs.texa.fta[psm!=1] - gs.texa.fta[0]) * (1 + (tex0.tfx == 0)) ;
valpha.y = (gs.texa.fta[psm != PSMCT24] - gs.texa.fta[0]) * (1 + (tex0.tfx == 0)) ;
}
valpha.z = (tex0.tfx >= 3) ;
@ -2206,7 +2196,7 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment)
valpha4.z = 0; valpha4.w = 0;
}
if( nNeedAlpha(psm) ) {
if( PSMT_ALPHAEXP(psm) ) {
if( tex0.tfx == 0 ) {
// make sure alpha is mult by two when the output is Cv = Ct*Cf
@ -2241,26 +2231,26 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment)
}
if ( equal_vectors(valpha, valpha3) && equal_vectors(valpha2, valpha4) ) {
if (CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][nNeedAlpha(psm)] == 0) {
printf ( "Good issue %d %d %d %d\n", tex0.tfx, tex0.tcc, psm, nNeedAlpha(psm) );
CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][nNeedAlpha(psm) ] = 1;
if (CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][PSMT_ALPHAEXP(psm)] == 0) {
printf ( "Good issue %d %d %d %d\n", tex0.tfx, tex0.tcc, psm, PSMT_ALPHAEXP(psm) );
CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][PSMT_ALPHAEXP(psm) ] = 1;
}
}
else if (CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][nNeedAlpha(psm)] == -1) {
else if (CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][PSMT_ALPHAEXP(psm)] == -1) {
printf ("Bad array, %d %d %d %d\n\tolf valpha %f, %f, %f, %f : valpha2 %f %f %f %f\n\tnew valpha %f, %f, %f, %f : valpha2 %f %f %f %f\n",
tex0.tfx, tex0.tcc, psm, nNeedAlpha(psm),
tex0.tfx, tex0.tcc, psm, PSMT_ALPHAEXP(psm),
valpha3.x, valpha3.y, valpha3.z, valpha3.w, valpha4.x, valpha4.y, valpha4.z, valpha4.w,
valpha.x, valpha.y, valpha.z, valpha.w, valpha2.x, valpha2.y, valpha2.z, valpha2.w);
CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][nNeedAlpha(psm)] = -1 ;
CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][PSMT_ALPHAEXP(psm)] = -1 ;
}
// Test;*/
ZZcgSetParameter4fv(pfragment->fTexAlpha, valpha, "g_fTexAlpha");
ZZcgSetParameter4fv(pfragment->fTexAlpha2, valpha2, "g_fTexAlpha2");
ZZshSetParameter4fv(pfragment->fTexAlpha, valpha, "g_fTexAlpha");
ZZshSetParameter4fv(pfragment->fTexAlpha2, valpha2, "g_fTexAlpha2");
if (tex0.tcc && gs.texa.aem && nNeedAlpha(psm))
ZZcgSetParameter4fv(pfragment->fTestBlack, vblack, "g_fTestBlack");
if (IsAlphaTestExpansion(tex0))
ZZshSetParameter4fv(pfragment->fTestBlack, vblack, "g_fTestBlack");
SetTexClamping(context, pfragment);
@ -2276,17 +2266,20 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment)
}
}
void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, CMemoryTarget* pmemtarg, FRAGMENTSHADER* pfragment, int force)
void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force)
{
FUNCLOG
Vector v;
assert(pmemtarg != NULL && pfragment != NULL && pmemtarg->ptex != NULL);
CMemoryTarget* pmemtarg = g_MemTargs.GetMemoryTarget(tex0, 1);
assert( pmemtarg != NULL && pfragment != NULL && pmemtarg->ptex != NULL);
if (pmemtarg == NULL || pfragment == NULL || pmemtarg->ptex == NULL)
{
printf("SetTexVariablesInt error\n");
ZZLog::Error_Log("SetTexVariablesInt error.");
return;
}
if (CheckVB && vb[context].pmemtarg == pmemtarg) return;
SetShaderCaller("SetTexVariablesInt");
@ -2303,9 +2296,9 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0,
v.w = 1.0f / (float)fh;
if (pfragment->fRealTexDims)
ZZcgSetParameter4fv(pfragment->fRealTexDims, v, "g_fRealTexDims");
ZZshSetParameter4fv(pfragment->fRealTexDims, v, "g_fRealTexDims");
else
ZZcgSetParameter4fv(cgGetNamedParameter(pfragment->prog, "g_fRealTexDims"), v, "g_fRealTexDims");
ZZshSetParameter4fv(cgGetNamedParameter(pfragment->prog,"g_fRealTexDims"),v, "g_fRealTexDims");
}
if (m_Blocks[tex0.psm].bpp == 0)
@ -2359,11 +2352,11 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0,
v.z *= b.bpp * (1 / 32.0f);
}
ZZcgSetParameter4fv(pfragment->fTexDims, vTexDims, "g_fTexDims");
ZZshSetParameter4fv(pfragment->fTexDims, vTexDims, "g_fTexDims");
// ZZcgSetParameter4fv(pfragment->fTexBlock, b.vTexBlock, "g_fTexBlock"); // I change it, and it's working. Seems casting from Vector to float[4] is ok.
ZZcgSetParameter4fv(pfragment->fTexBlock, &b.vTexBlock.x, "g_fTexBlock");
ZZcgSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset");
// ZZshSetParameter4fv(pfragment->fTexBlock, b.vTexBlock, "g_fTexBlock"); // I change it, and it's working. Seems casting from Vector to float[4] is ok.
ZZshSetParameter4fv(pfragment->fTexBlock, &b.vTexBlock.x, "g_fTexBlock");
ZZshSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset");
// get hardware texture dims
//int texheight = (pmemtarg->realheight+pmemtarg->widthmult-1)/pmemtarg->widthmult;
@ -2383,7 +2376,7 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0,
v.w = 0.5f;*/
v.w = 0.5f;
ZZcgSetParameter4fv(pfragment->fPageOffset, v, "g_fPageOffset");
ZZshSetParameter4fv(pfragment->fPageOffset, v, "g_fPageOffset");
if (force)
s_ptexCurSet[context] = pmemtarg->ptex->tex;

View File

@ -21,11 +21,44 @@
//------------------- Includes
#include "zerogs.h"
#include "ZeroGSShaders/zerogsshaders.h"
#include "ZZoglShaders.h"
#include "zpipe.h"
// ----------------- Defines
#define TEXWRAP_REPEAT 0
#define TEXWRAP_CLAMP 1
#define TEXWRAP_REGION_REPEAT 2
#define TEXWRAP_REPEAT_CLAMP 3
#define SH_WRITEDEPTH 0x2000 // depth is written
#define SH_CONTEXT1 0x1000 // context1 is used
#define SH_REGULARVS 0x8000
#define SH_TEXTUREVS 0x8001
#define SH_REGULARFOGVS 0x8002
#define SH_TEXTUREFOGVS 0x8003
#define SH_REGULARPS 0x8004
#define SH_REGULARFOGPS 0x8005
#define SH_BITBLTVS 0x8006
#define SH_BITBLTPS 0x8007
#define SH_BITBLTDEPTHPS 0x8009
#define SH_CRTCTARGPS 0x800a
#define SH_CRTCPS 0x800b
#define SH_CRTC24PS 0x800c
#define SH_ZEROPS 0x800e
#define SH_BASETEXTUREPS 0x800f
#define SH_BITBLTAAPS 0x8010
#define SH_CRTCTARGINTERPS 0x8012
#define SH_CRTCINTERPS 0x8013
#define SH_CRTC24INTERPS 0x8014
#define SH_BITBLTDEPTHMRTPS 0x8016
#define SH_CONVERT16TO32PS 0x8020
#define SH_CONVERT32TO16PS 0x8021
#define SH_CRTC_NEARESTPS 0x8022
#define SH_CRTCINTER_NEARESTPS 0x8023
using namespace ZeroGS;
//------------------ Constants
@ -35,24 +68,40 @@ namespace ZeroGS
{
FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne;
FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
VERTEXSHADER pvsBitBlt;
}
// Debug variable, store name of the function that call the shader.
const char* ShaderCallerName = "";
const char* ShaderHandleName = "";
extern u32 ptexBlocks; // holds information on block tiling
extern u32 ptexConv16to32;
extern u32 ptexBlocks; // holds information on block tiling. Its texture number in OpenGL -- if 0 than such texture
extern u32 ptexConv16to32; // does not exist. This textures should be created on start and released on finish.
extern u32 ptexConv32to16;
bool g_bCRTCBilinear = true;
u8* s_lpShaderResources = NULL;
map<int, SHADERHEADER*> mapShaderResources;
CGcontext g_cgcontext;
ZZshContext g_cgcontext;
ZZshProfile cgvProf, cgfProf;
int g_nPixelShaderVer = 0; // default
//------------------ Code
bool ZZshCheckProfilesSupport() {
// load the effect, find the best profiles (if any)
if (cgGLIsProfileSupported(CG_PROFILE_ARBVP1) != CG_TRUE) {
ZZLog::Error_Log("arbvp1 not supported.");
return false;
}
if (cgGLIsProfileSupported(CG_PROFILE_ARBFP1) != CG_TRUE) {
ZZLog::Error_Log("arbfp1 not supported.");
return false;
}
return true;
}
// Error handler. Setup in ZZogl_Create once.
void HandleCgError(CGcontext ctx, CGerror err, void* appdata)
void HandleCgError(ZZshContext ctx, ZZshError err, void* appdata)
{
ZZLog::Error_Log("%s->%s: %s", ShaderCallerName, ShaderHandleName, cgGetErrorString(err));
const char* listing = cgGetLastListing(g_cgcontext);
@ -60,14 +109,106 @@ void HandleCgError(CGcontext ctx, CGerror err, void* appdata)
if (listing != NULL) ZZLog::Debug_Log(" Last listing: %s", listing);
}
bool ZZshStartUsingShaders() {
cgSetErrorHandler(HandleCgError, NULL);
g_cgcontext = cgCreateContext();
cgvProf = CG_PROFILE_ARBVP1;
cgfProf = CG_PROFILE_ARBFP1;
cgGLEnableProfile(cgvProf);
cgGLEnableProfile(cgfProf);
cgGLSetOptimalOptions(cgvProf);
cgGLSetOptimalOptions(cgfProf);
cgGLSetManageTextureParameters(g_cgcontext, CG_FALSE);
//cgSetAutoCompile(g_cgcontext, CG_COMPILE_IMMEDIATE);
g_fparamFogColor = cgCreateParameter(g_cgcontext, CG_FLOAT4);
g_vparamPosXY[0] = cgCreateParameter(g_cgcontext, CG_FLOAT4);
g_vparamPosXY[1] = cgCreateParameter(g_cgcontext, CG_FLOAT4);
ZZLog::Debug_Log("Creating effects.");
B_G(LoadEffects(), return false);
// create a sample shader
clampInfo temp;
memset(&temp, 0, sizeof(temp));
temp.wms = 3; temp.wmt = 3;
g_nPixelShaderVer = 0;//SHADER_ACCURATE;
// test
bool bFailed;
FRAGMENTSHADER* pfrag = LoadShadeEffect(0, 1, 1, 1, 1, temp, 0, &bFailed);
if( bFailed || pfrag == NULL ) {
g_nPixelShaderVer = SHADER_ACCURATE|SHADER_REDUCED;
pfrag = LoadShadeEffect(0, 0, 1, 1, 0, temp, 0, &bFailed);
if( pfrag != NULL )
cgGLLoadProgram(pfrag->prog);
if( bFailed || pfrag == NULL || cgGetError() != CG_NO_ERROR ) {
g_nPixelShaderVer = SHADER_REDUCED;
ZZLog::Error_Log("Basic shader test failed.");
}
}
if (g_nPixelShaderVer & SHADER_REDUCED)
conf.bilinear = 0;
ZZLog::Debug_Log("Creating extra effects.");
B_G(LoadExtraEffects(), return false);
ZZLog::Debug_Log("using %s shaders.", g_pShaders[g_nPixelShaderVer]);
return true;
}
// Disable CG
void ZZshGLDisableProfile() {
cgGLDisableProfile(cgvProf);
cgGLDisableProfile(cgfProf);
}
//Enable CG
void ZZshGLEnableProfile() {
cgGLEnableProfile(cgvProf);
cgGLEnableProfile(cgfProf);
}
// This is a helper of cgGLSetParameter4fv, made for debugging purposes.
// The name could be any string. We must use it on compilation time, because the erronious handler does not
// return it.
void ZZcgSetParameter4fv(CGparameter param, const float* v, const char* name)
void ZZshSetParameter4fv(ZZshParameter param, const float* v, const char* name)
{
ShaderHandleName = name;
cgGLSetParameter4fv(param, v);
}
// The same function for texture, also to cgGLEnable
void ZZshGLSetTextureParameter(ZZshParameter param, GLuint texobj, const char* name) {
ShaderHandleName = name;
cgGLSetTextureParameter(param, texobj);
cgGLEnableTextureParameter(param);
}
// Used sometimes for color 1.
void ZZshDefaultOneColor( FRAGMENTSHADER ptr ) {
ShaderHandleName = "Set Default One color";
Vector v = Vector ( 1, 1, 1, 1 );
ZZshSetParameter4fv( ptr.sOneColor, v, "DefaultOne");
}
void ZZshSetVertexShader(ZZshShader prog) {
if ((prog) != g_vsprog) {
cgGLBindProgram(prog);
g_vsprog = prog;
}
}
void ZZshSetPixelShader(ZZshShader prog) {
if ((prog) != g_psprog) {
cgGLBindProgram(prog);
g_psprog = prog;
}
}
void SetupFragmentProgramParameters(FRAGMENTSHADER* pf, int context, int type)
{
@ -126,9 +267,9 @@ void SetupFragmentProgramParameters(FRAGMENTSHADER* pf, int context, int type)
static bool outdated_shaders = false;
void SetupVertexProgramParameters(CGprogram prog, int context)
void SetupVertexProgramParameters(ZZshProgram prog, int context)
{
CGparameter p;
ZZshParameter p;
p = cgGetNamedParameter(prog, "g_fPosXY");

View File

@ -0,0 +1,243 @@
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef __ZEROGS_SHADERS_H__
#define __ZEROGS_SHADERS_H__
// -- Not very important things, but we keep it to enumerate shader
#define NUM_FILTERS 2 // texture filtering
#define NUM_TYPES 5 // types of texture read modes
#define NUM_TEXWRAPS 4 // texture wrapping
#define NUM_SHADERS (NUM_FILTERS*NUM_TYPES*NUM_TEXWRAPS*32) // # shaders for a given ps
// Just bitmask for different type of shaders
#define SHADER_REDUCED 1 // equivalent to ps2.0
#define SHADER_ACCURATE 2 // for older cards with less accurate math (ps2.x+)
// For output
const static char* g_pShaders[] = { "full", "reduced", "accurate", "accurate-reduced" };
#define NVIDIA_CG_API
// --------------------------- API abstraction level --------------------------------
#ifdef NVIDIA_CG_API // Code for NVIDIA cg-toolkit API
#include <Cg/cg.h>
#include <Cg/cgGL.h>
#define ZZshProgram CGprogram
#define ZZshShader CGprogram
#define ZZshShaderLink CGprogram
#define ZZshParameter CGparameter
#define ZZshContext CGcontext
#define ZZshProfile CGprofile
#define ZZshError CGerror
#define pZero 0 // Zero parameter
#define sZero 0 // Zero program
#define SAFE_RELEASE_PROG(x) { if( (x) != NULL ) { cgDestroyProgram(x); x = NULL; } }
inline bool ZZshActiveParameter(ZZshParameter param) {return (param !=NULL); }
#endif // end NVIDIA cg-toolkit API
const static char* g_pPsTexWrap[] = { "-DREPEAT", "-DCLAMP", "-DREGION_REPEAT", NULL };
const static char* g_pTexTypes[] = { "32", "tex32", "clut32", "tex32to16", "tex16to8h" };
enum ZZshShaderType {ZZ_SH_ZERO, ZZ_SH_REGULAR, ZZ_SH_REGULAR_FOG, ZZ_SH_TEXTURE, ZZ_SH_TEXTURE_FOG, ZZ_SH_CRTC};
// We have "compatible" shaders, as RegularFogVS and RegularFogPS, if we don't need to worry about incompatible shaders.
// It's used only in GLSL mode.
// ------------------------- Variables -------------------------------
extern int g_nPixelShaderVer;
extern ZZshShaderLink pvs[16], g_vsprog, g_psprog;
extern ZZshParameter g_vparamPosXY[2], g_fparamFogColor;
#define MAX_ACTIVE_UNIFORMS 600
#define MAX_ACTIVE_SHADERS 400
struct FRAGMENTSHADER
{
FRAGMENTSHADER() : prog(sZero), Shader(0), sMemory(pZero), sFinal(pZero), sBitwiseANDX(pZero), sBitwiseANDY(pZero), sInterlace(pZero), sCLUT(pZero), sOneColor(pZero), sBitBltZ(pZero),
fTexAlpha2(pZero), fTexOffset(pZero), fTexDims(pZero), fTexBlock(pZero), fClampExts(pZero), fTexWrapMode(pZero),
fRealTexDims(pZero), fTestBlack(pZero), fPageOffset(pZero), fTexAlpha(pZero) {}
ZZshShaderLink prog; // it links to the FRAGMENTSHADER structure, for compatibility between GLSL and CG.
ZZshShader Shader; // GLSL store shaders not as ready programs, but as shader compiled objects. VS and PS should be linked together to
// make a program.
ZZshShaderType ShaderType; // Not every PS and VS are used together, only compatible ones.
ZZshParameter sMemory, sFinal, sBitwiseANDX, sBitwiseANDY, sInterlace, sCLUT;
ZZshParameter sOneColor, sBitBltZ, sInvTexDims;
ZZshParameter fTexAlpha2, fTexOffset, fTexDims, fTexBlock, fClampExts, fTexWrapMode, fRealTexDims, fTestBlack, fPageOffset, fTexAlpha;
int ParametersStart, ParametersFinish; // this is part of UniformsIndex array in which parameters of this shader asre stored. The last one is ParametersFinish-1
#ifdef _DEBUG
string filename;
#endif
void set_uniform_param(ZZshParameter &var, const char *name)
{
ZZshParameter p;
p = cgGetNamedParameter(prog, name);
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) var = p;
}
bool set_texture(GLuint texobj, const char *name)
{
ZZshParameter p;
p = cgGetNamedParameter(prog, name);
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE)
{
cgGLSetTextureParameter(p, texobj);
cgGLEnableTextureParameter(p);
return true;
}
return false;
}
bool connect(ZZshParameter &tex, const char *name)
{
ZZshParameter p;
p = cgGetNamedParameter(prog, name);
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE)
{
cgConnectParameter(tex, p);
return true;
}
return false;
}
bool set_texture(ZZshParameter &tex, const char *name)
{
ZZshParameter p;
p = cgGetNamedParameter(prog, name);
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE)
{
//cgGLEnableTextureParameter(p);
tex = p;
return true;
}
return false;
}
bool set_shader_const(Vector v, const char *name)
{
ZZshParameter p;
p = cgGetNamedParameter(prog, name);
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE)
{
cgGLSetParameter4fv(p, v);
return true;
}
return false;
}
};
struct VERTEXSHADER
{
VERTEXSHADER() : prog(sZero), Shader(0), sBitBltPos(pZero), sBitBltTex(pZero) {}
ZZshShaderLink prog;
ZZshShader Shader;
ZZshShaderType ShaderType;
ZZshParameter sBitBltPos, sBitBltTex, fBitBltTrans; // vertex shader constants
int ParametersStart, ParametersFinish;
};
namespace ZeroGS {
// Shaders variables
extern Vector g_vdepth;
extern Vector vlogz;
extern VERTEXSHADER pvsBitBlt;
extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne; // ppsOne used to stop using shaders for draw
extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
bool LoadEffects();
bool LoadExtraEffects();
FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
// only sets a limited amount of state (for Update)
void SetTexClamping(int context, FRAGMENTSHADER* pfragment);
void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force);
}
// ------------------------- Variables -------------------------------
extern u8* s_lpShaderResources;
extern ZZshProfile cgvProf, cgfProf;
extern FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS];
extern FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2];
// ------------------------- Functions -------------------------------
#ifdef NVIDIA_CG_API
inline bool ZZshExistProgram(FRAGMENTSHADER* pf) {return (pf->prog != NULL); }; // We don't check ps != NULL, so be warned,
inline bool ZZshExistProgram(VERTEXSHADER* pf) {return (pf->prog != NULL); };
inline bool ZZshExistProgram(ZZshShaderLink prog) {return (prog != NULL); };
#endif
extern const char* ShaderCallerName;
extern const char* ShaderHandleName;
inline void SetShaderCaller(const char* Name) {
ShaderCallerName = Name;
}
inline void SetHandleName(const char* Name) {
ShaderHandleName = Name;
}
inline void ResetShaderCounters() {
// g_vsprog = g_psprog = sZero;
}
extern bool ZZshCheckProfilesSupport();
extern bool ZZshStartUsingShaders();
extern void ZZshGLDisableProfile();
extern void ZZshGLEnableProfile();
extern void ZZshSetParameter4fv(ZZshParameter param, const float* v, const char* name);
extern void ZZshGLSetTextureParameter(ZZshParameter param, GLuint texobj, const char* name);
extern void ZZshDefaultOneColor( FRAGMENTSHADER ptr );
extern void ZZshSetVertexShader(ZZshShader prog);
extern void ZZshSetPixelShader(ZZshShader prog);
inline int GET_SHADER_INDEX(int type, int texfilter, int texwrap, int fog, int writedepth, int testaem, int exactcolor, int context, int ps)
{
return type + texfilter*NUM_TYPES + NUM_FILTERS*NUM_TYPES*texwrap + NUM_TEXWRAPS*NUM_FILTERS*NUM_TYPES*(fog+2*writedepth+4*testaem+8*exactcolor+16*context+32*ps);
}
struct SHADERHEADER
{
unsigned int index, offset, size; // if highest bit of index is set, pixel shader
};
#endif

View File

@ -40,7 +40,7 @@ static __forceinline int GET_SHADER_INDEX(int type, int texfilter, int texwrap,
return type + texfilter*NUM_TYPES + NUM_FILTERS*NUM_TYPES*texwrap + NUM_TEXWRAPS*NUM_FILTERS*NUM_TYPES*(fog+2*writedepth+4*testaem+8*exactcolor+16*context+32*ps);
}
extern CGcontext g_cgcontext;
extern ZZshContext g_cgcontext;
static __forceinline CGprogram LoadShaderFromType(const char* srcdir, const char* srcfile, int type, int texfilter, int texwrap, int fog, int writedepth, int testaem, int exactcolor, int ps, int context)
{
@ -63,7 +63,7 @@ static __forceinline CGprogram LoadShaderFromType(const char* srcdir, const char
if( ps & SHADER_ACCURATE ) macros.push_back("-DACCURATE_DECOMPRESSION");
macros.push_back(NULL);
CGprogram prog = cgCreateProgramFromFile(g_cgcontext, CG_SOURCE, srcfile, CG_PROFILE_ARBFP1, str, &macros[0]);
ZZshProgram prog = cgCreateProgramFromFile(g_cgcontext, CG_SOURCE, srcfile, CG_PROFILE_ARBFP1, str, &macros[0]);
if( !cgIsProgram(prog) ) {
printf("Failed to load shader %s: \n%s\n", str, cgGetLastListing(g_cgcontext));
return NULL;

View File

@ -18,8 +18,6 @@
*/
#include "GS.h"
#include <Cg/cg.h>
#include <Cg/cgGL.h>
#include <stdlib.h>
@ -27,6 +25,7 @@
#include "x86.h"
#include "zerogs.h"
#include "targets.h"
#include "ZZoglShaders.h"
#define RHA
//#define RW
@ -128,7 +127,7 @@ inline Vector ZeroGS::CRenderTarget::DefaultBitBltPos()
{
Vector v = Vector(1, -1, 0.5f / (float)RW(fbw), 0.5f / (float)RH(fbh));
v *= 1.0f / 32767.0f;
ZZcgSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_sBitBltPos");
ZZshSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_sBitBltPos");
return v;
}
@ -139,7 +138,7 @@ inline Vector ZeroGS::CRenderTarget::DefaultBitBltTex()
// I really sure that -0.5 is correct, because OpenGL have no half-offset
// issue, DirectX known for.
Vector v = Vector(1, -1, 0.5f / (float)RW(fbw), -0.5f / (float)RH(fbh));
ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_sBitBltTex");
ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_sBitBltTex");
return v;
}
@ -237,11 +236,11 @@ void ZeroGS::CRenderTarget::SetTarget(int fbplocal, const Rect2& scissor, int co
v.y = vposxy.y;
v.z = vposxy.z;
v.w = vposxy.w - dy * 2.0f / (float)fbh;
ZZcgSetParameter4fv(g_vparamPosXY[context], v, "g_fPosXY");
ZZshSetParameter4fv(g_vparamPosXY[context], v, "g_fPosXY");
}
else
{
ZZcgSetParameter4fv(g_vparamPosXY[context], vposxy, "g_fPosXY");
ZZshSetParameter4fv(g_vparamPosXY[context], vposxy, "g_fPosXY");
}
// set render states
@ -434,8 +433,7 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth)
if (nUpdateTarg)
{
cgGLSetTextureParameter(ppsBaseTexture.sFinal, ittarg->second->ptex);
cgGLEnableTextureParameter(ppsBaseTexture.sFinal);
ZZshGLSetTextureParameter(ppsBaseTexture.sFinal, ittarg->second->ptex, "BaseTexture.final");
//assert( ittarg->second->fbw == fbw );
int offset = (fbp - ittarg->second->fbp) * 64 / fbw;
@ -448,17 +446,19 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth)
v.z = 0.25f;
v.w = (float)RH(offset) + 0.25f;
ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
// v = DefaultBitBltTex(); Maybe?
v = DefaultOneColor(ppsBaseTexture) ;
ZZshDefaultOneColor ( ppsBaseTexture );
SETPIXELSHADER(ppsBaseTexture.prog);
ZZshSetPixelShader(ppsBaseTexture.prog);
nUpdateTarg = 0;
}
else
{
u32 bit_idx = (AA.x == 0) ? 0 : 1;
// align the rect to the nearest page
// note that fbp is always aligned on page boundaries
tex0Info texframe;
@ -467,21 +467,20 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth)
texframe.tw = fbw;
texframe.th = fbh;
texframe.psm = psm;
CMemoryTarget* pmemtarg = g_MemTargs.GetMemoryTarget(texframe, 1);
// write color and zero out stencil buf, always 0 context!
// force bilinear if using AA
// Fix in r133 -- FFX movies and Gust backgrounds!
SetTexVariablesInt(0, 0*(s_AAx || s_AAy) ? 2 : 0, texframe, pmemtarg, &ppsBitBlt[!!s_AAx], 1);
cgGLSetTextureParameter(ppsBitBlt[!!s_AAx].sMemory, pmemtarg->ptex->tex);
cgGLEnableTextureParameter(ppsBitBlt[!!s_AAx].sMemory);
//SetTexVariablesInt(0, 0*(AA.x || AA.y) ? 2 : 0, texframe, false, &ppsBitBlt[!!s_AAx], 1);
SetTexVariablesInt(0, 0, texframe, false, &ppsBitBlt[bit_idx], 1);
ZZshGLSetTextureParameter(ppsBitBlt[bit_idx].sMemory, vb[0].pmemtarg->ptex->tex, "BitBlt.memory");
v = Vector(1, 1, 0.0f, 0.0f);
ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
v.x = 1;
v.y = 2;
ZZcgSetParameter4fv(ppsBitBlt[!!s_AAx].sOneColor, v, "g_fOneColor");
ZZshSetParameter4fv(ppsBitBlt[bit_idx].sOneColor, v, "g_fOneColor");
assert(ptex != 0);
@ -496,11 +495,11 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth)
}
// render with an AA shader if possible (bilinearly interpolates data)
//cgGLLoadProgram(ppsBitBlt[!!s_AAx].prog);
SETPIXELSHADER(ppsBitBlt[!!s_AAx].prog);
//cgGLLoadProgram(ppsBitBlt[bit_idx].prog);
ZZshSetPixelShader(ppsBitBlt[bit_idx].prog);
}
SETVERTEXSHADER(pvsBitBlt.prog);
ZZshSetVertexShader(pvsBitBlt.prog);
DrawTriangleArray();
@ -545,22 +544,22 @@ void ZeroGS::CRenderTarget::ConvertTo32()
v.y = (float)RH(16);
v.z = -(float)RW(fbw);
v.w = (float)RH(8);
ZZcgSetParameter4fv(ppsConvert16to32.fTexOffset, v, "g_fTexOffset");
ZZshSetParameter4fv(ppsConvert16to32.fTexOffset, v, "g_fTexOffset");
v.x = (float)RW(8);
v.y = 0;
v.z = 0;
v.w = 0.25f;
ZZcgSetParameter4fv(ppsConvert16to32.fPageOffset, v, "g_fPageOffset");
ZZshSetParameter4fv(ppsConvert16to32.fPageOffset, v, "g_fPageOffset");
v.x = (float)RW(2 * fbw);
v.y = (float)RH(fbh);
v.z = 0;
v.w = 0.0001f * (float)RH(fbh);
ZZcgSetParameter4fv(ppsConvert16to32.fTexDims, v, "g_fTexDims");
ZZshSetParameter4fv(ppsConvert16to32.fTexDims, v, "g_fTexDims");
// v.x = 0;
// ZZcgSetParameter4fv(ppsConvert16to32.fTexBlock, v, "g_fTexBlock");
// ZZshSetParameter4fv(ppsConvert16to32.fTexBlock, v, "g_fTexBlock");
glBindBuffer(GL_ARRAY_BUFFER, vboRect);
SET_STREAM();
@ -569,10 +568,8 @@ void ZeroGS::CRenderTarget::ConvertTo32()
FBTexture(0, ptexConv);
ZeroGS::ResetRenderTarget(1);
BindToSample(&ptex) ;
cgGLSetTextureParameter(ppsConvert16to32.sFinal, ptex);
cgGLEnableTextureParameter(ppsBitBlt[!!s_AAx].sMemory);
BindToSample(&ptex);
ZZshGLSetTextureParameter(ppsConvert16to32.sFinal, ptex, "Convert 16 to 32.Final");
fbh /= 2; // have 16 bit surfaces are usually 2x higher
SetViewport();
@ -580,9 +577,8 @@ void ZeroGS::CRenderTarget::ConvertTo32()
if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
// render with an AA shader if possible (bilinearly interpolates data)
SETVERTEXSHADER(pvsBitBlt.prog);
SETPIXELSHADER(ppsConvert16to32.prog);
ZZshSetVertexShader(pvsBitBlt.prog);
ZZshSetPixelShader(ppsConvert16to32.prog);
DrawTriangleArray();
#ifdef _DEBUG
@ -600,7 +596,6 @@ void ZeroGS::CRenderTarget::ConvertTo32()
// restore
SAFE_RELEASE_TEX(ptex);
SAFE_RELEASE_TEX(ptexFeedback);
ptex = ptexConv;
@ -609,7 +604,7 @@ void ZeroGS::CRenderTarget::ConvertTo32()
if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
// reset textures
BindToSample(&ptex) ;
BindToSample(&ptex);
glEnable(GL_SCISSOR_TEST);
@ -653,19 +648,19 @@ void ZeroGS::CRenderTarget::ConvertTo16()
v.y = 8.0f / (float)fbh;
v.z = 0.5f * v.x;
v.w = 0.5f * v.y;
ZZcgSetParameter4fv(ppsConvert32to16.fTexOffset, v, "g_fTexOffset");
ZZshSetParameter4fv(ppsConvert32to16.fTexOffset, v, "g_fTexOffset");
v.x = 256.0f / 255.0f;
v.y = 256.0f / 255.0f;
v.z = 0.05f / 256.0f;
v.w = -0.001f / 256.0f;
ZZcgSetParameter4fv(ppsConvert32to16.fPageOffset, v, "g_fPageOffset");
ZZshSetParameter4fv(ppsConvert32to16.fPageOffset, v, "g_fPageOffset");
v.x = (float)RW(fbw);
v.y = (float)RH(2 * fbh);
v.z = 0;
v.w = -0.1f / RH(fbh);
ZZcgSetParameter4fv(ppsConvert32to16.fTexDims, v, "g_fTexDims");
ZZshSetParameter4fv(ppsConvert32to16.fTexDims, v, "g_fTexDims");
glBindBuffer(GL_ARRAY_BUFFER, vboRect);
SET_STREAM();
@ -675,10 +670,9 @@ void ZeroGS::CRenderTarget::ConvertTo16()
ZeroGS::ResetRenderTarget(1);
GL_REPORT_ERRORD();
BindToSample(&ptex) ;
BindToSample(&ptex);
cgGLSetTextureParameter(ppsConvert32to16.sFinal, ptex);
cgGLEnableTextureParameter(ppsConvert32to16.sFinal);
ZZshGLSetTextureParameter(ppsConvert32to16.sFinal, ptex, "Convert 32 to 16");
// fbh *= 2; // have 16 bit surfaces are usually 2x higher
@ -687,9 +681,8 @@ void ZeroGS::CRenderTarget::ConvertTo16()
if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
// render with an AA shader if possible (bilinearly interpolates data)
SETVERTEXSHADER(pvsBitBlt.prog);
SETPIXELSHADER(ppsConvert32to16.prog);
ZZshSetVertexShader(pvsBitBlt.prog);
ZZshSetPixelShader(ppsConvert32to16.prog);
DrawTriangleArray();
#ifdef _DEBUG
@ -702,7 +695,6 @@ void ZeroGS::CRenderTarget::ConvertTo16()
#endif
vposxy.y = -2.0f * (32767.0f / 8.0f) / (float)fbh;
vposxy.w = 1 + 0.5f / fbh;
// restore
@ -759,11 +751,11 @@ void ZeroGS::CRenderTarget::_CreateFeedback()
// tex coords, test ffx bikanel island when changing these
/* Vector v = DefaultBitBltPos();
v = Vector ((float)(RW(fbw+4)), (float)(RH(fbh+4)), +0.25f, -0.25f);
ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, v, "BitBltTex");*/
ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "BitBltTex");*/
// tex coords, test ffx bikanel island when changing these
// Vector v = Vector(1, -1, 0.5f / (fbw<<s_AAx), 0.5f / (fbh << s_AAy));
// Vector v = Vector(1, -1, 0.5f / (fbw << AA.x), 0.5f / (fbh << AA.y));
// v *= 1/32767.0f;
// cgGLSetParameter4fv(pvsBitBlt.sBitBltPos, v);
Vector v = DefaultBitBltPos();
@ -772,8 +764,8 @@ void ZeroGS::CRenderTarget::_CreateFeedback()
v.y = (float)(RH(fbh));
v.z = 0.0f;
v.w = 0.0f;
cgGLSetParameter4fv(pvsBitBlt.sBitBltTex, v);
v = DefaultOneColor(ppsBaseTexture);
ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "BitBlt.Feedback");
ZZshDefaultOneColor(ppsBaseTexture);
glBindBuffer(GL_ARRAY_BUFFER, vboRect);
SET_STREAM();
@ -782,16 +774,15 @@ void ZeroGS::CRenderTarget::_CreateFeedback()
glBindTexture(GL_TEXTURE_RECTANGLE_NV, ptex);
GL_REPORT_ERRORD();
cgGLSetTextureParameter(ppsBaseTexture.sFinal, ptex);
cgGLEnableTextureParameter(ppsBaseTexture.sFinal);
ZZshGLSetTextureParameter(ppsBaseTexture.sFinal, ptex, "BaseTexture.Feedback");
SetViewport();
if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
// render with an AA shader if possible (bilinearly interpolates data)
SETVERTEXSHADER(pvsBitBlt.prog);
SETPIXELSHADER(ppsBaseTexture.prog);
ZZshSetVertexShader(pvsBitBlt.prog);
ZZshSetPixelShader(ppsBaseTexture.prog);
DrawTriangleArray();
// restore
@ -967,7 +958,6 @@ void ZeroGS::CDepthTarget::Update(int context, ZeroGS::CRenderTarget* prndr)
texframe.tw = fbw;
texframe.th = fbh;
texframe.psm = psm;
CMemoryTarget* pmemtarg = g_MemTargs.GetMemoryTarget(texframe, 1);
DisableAllgl();
@ -986,11 +976,9 @@ void ZeroGS::CDepthTarget::Update(int context, ZeroGS::CRenderTarget* prndr)
glDepthFunc(g_dwZCmp[curvb.test.ztst]);
// write color and zero out stencil buf, always 0 context!
SetTexVariablesInt(0, 0, texframe, pmemtarg, &ppsBitBltDepth, 1);
cgGLSetTextureParameter(ppsBitBltDepth.sMemory, pmemtarg->ptex->tex);
cgGLEnableTextureParameter(ppsBaseTexture.sFinal);
SetTexVariablesInt(0, 0, texframe, false, &ppsBitBltDepth, 1);
ZZshGLSetTextureParameter(ppsBitBltDepth.sMemory, vb[0].pmemtarg->ptex->tex, "BitBltDepth");
Vector v = DefaultBitBltPos();
v = DefaultBitBltTex();
@ -999,7 +987,7 @@ void ZeroGS::CDepthTarget::Update(int context, ZeroGS::CRenderTarget* prndr)
v.y = 2;
v.z = PSMT_IS16Z(psm) ? 1.0f : 0.0f;
v.w = g_filog32;
ZZcgSetParameter4fv(ppsBitBltDepth.sOneColor, v, "g_fOneColor");
ZZshSetParameter4fv(ppsBitBltDepth.sOneColor, v, "g_fOneColor");
Vector vdepth = g_vdepth;
@ -1014,7 +1002,7 @@ void ZeroGS::CDepthTarget::Update(int context, ZeroGS::CRenderTarget* prndr)
assert(ppsBitBltDepth.sBitBltZ != 0);
ZZcgSetParameter4fv(ppsBitBltDepth.sBitBltZ, ((255.0f / 256.0f)*vdepth), "g_fBitBltZ");
ZZshSetParameter4fv(ppsBitBltDepth.sBitBltZ, ((255.0f / 256.0f)*vdepth), "g_fBitBltZ");
assert(pdepth != 0);
//GLint w1 = 0;
@ -1039,8 +1027,8 @@ void ZeroGS::CDepthTarget::Update(int context, ZeroGS::CRenderTarget* prndr)
glBindBuffer(GL_ARRAY_BUFFER, vboRect);
SET_STREAM();
SETVERTEXSHADER(pvsBitBlt.prog);
SETPIXELSHADER(ppsBitBltDepth.prog);
ZZshSetVertexShader(pvsBitBlt.prog);
ZZshSetPixelShader(ppsBitBltDepth.prog);
DrawTriangleArray();
@ -1892,7 +1880,7 @@ static __forceinline void BuildClut(u32 psm, u32 height, T* pclut, u8* psrc, T*
#define TARGET_THRESH 0x500
extern int g_MaxTexWidth, g_MaxTexHeight;
extern int g_MaxTexWidth, g_MaxTexHeight; // Maximum height & width of supported texture.
//#define SORT_TARGETS
inline list<CMemoryTarget>::iterator ZeroGS::CMemoryTargetMngr::DestroyTargetIter(list<CMemoryTarget>::iterator& it)
@ -2057,29 +2045,6 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::MemoryTarget_SearchExistTarget
return NULL;
}
static __forceinline int NumberOfChannels(int psm)
{
int channels = 1;
if (PSMT_ISCLUT(psm))
{
if (psm == PSMT8)
channels = 4;
else if (psm == PSMT4)
channels = 8;
}
else
{
if (PSMT_IS16BIT(psm))
{
// 16z needs to be a8r8g8b8
channels = 2;
}
}
return channels;
}
ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::MemoryTarget_ClearedTargetsSearch(int fmt, int widthmult, int channels, int height)
{
CMemoryTarget* targ = NULL;
@ -2093,9 +2058,7 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::MemoryTarget_ClearedTargetsSea
if ((height <= itbest->realheight) && (itbest->fmt == fmt) && (itbest->widthmult == widthmult) && (itbest->channels == channels))
{
// check channels
int targchannels = NumberOfChannels(itbest->psm);
if (targchannels == channels) break;
if (PIXELS_PER_WORD(itbest->psm) == channels) break;
}
++itbest;
@ -2140,12 +2103,14 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
u32 fmt = GL_UNSIGNED_BYTE;
// RGBA16 storage format
if (PSMT_ISHALF_STORAGE(tex0)) fmt = GL_UNSIGNED_SHORT_1_5_5_5_REV;
int widthmult = 1, channels = 1;
// If our texture is too big and could not be placed in 1 GPU texture. Pretty rare.
if ((g_MaxTexHeight < 4096) && (end - start > g_MaxTexHeight)) widthmult = 2;
channels = NumberOfChannels(tex0.psm);
channels = PIXELS_PER_WORD(tex0.psm);
targ = MemoryTarget_ClearedTargetsSearch(fmt, widthmult, channels, end - start);
@ -3122,11 +3087,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo
case PSMCT24:
if (s_AAy)
if (AA.y)
{
RESOLVE_32BIT(32, u32, u32, 32A4, 8, 8, (u32), Frame, s_AAx, s_AAy);
RESOLVE_32BIT(32, u32, u32, 32A4, 8, 8, (u32), Frame, AA.x, AA.y);
}
else if (s_AAx)
else if (AA.x)
{
RESOLVE_32BIT(32, u32, u32, 32A2, 8, 8, (u32), Frame, 1, 0);
}
@ -3139,11 +3104,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo
case PSMCT16:
if (s_AAy)
if (AA.y)
{
RESOLVE_32BIT(16, u16, u32, 16A4, 16, 8, RGBA32to16, Frame, s_AAx, s_AAy);
RESOLVE_32BIT(16, u16, u32, 16A4, 16, 8, RGBA32to16, Frame, AA.x, AA.y);
}
else if (s_AAx)
else if (AA.x)
{
RESOLVE_32BIT(16, u16, u32, 16A2, 16, 8, RGBA32to16, Frame, 1, 0);
}
@ -3156,11 +3121,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo
case PSMCT16S:
if (s_AAy)
if (AA.y)
{
RESOLVE_32BIT(16S, u16, u32, 16A4, 16, 8, RGBA32to16, Frame, s_AAx, s_AAy);
RESOLVE_32BIT(16S, u16, u32, 16A4, 16, 8, RGBA32to16, Frame, AA.x, AA.y);
}
else if (s_AAx)
else if (AA.x)
{
RESOLVE_32BIT(16S, u16, u32, 16A2, 16, 8, RGBA32to16, Frame, 1, 0);
}
@ -3175,11 +3140,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo
case PSMT24Z:
if (s_AAy)
if (AA.y)
{
RESOLVE_32BIT(32Z, u32, u32, 32A4, 8, 8, (u32), Frame, s_AAx, s_AAy);
RESOLVE_32BIT(32Z, u32, u32, 32A4, 8, 8, (u32), Frame, AA.x, AA.y);
}
else if (s_AAx)
else if (AA.x)
{
RESOLVE_32BIT(32Z, u32, u32, 32A2, 8, 8, (u32), Frame, 1, 0);
}
@ -3192,11 +3157,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo
case PSMT16Z:
if (s_AAy)
if (AA.y)
{
RESOLVE_32BIT(16Z, u16, u32, 16A4, 16, 8, (u16), Frame, s_AAx, s_AAy);
RESOLVE_32BIT(16Z, u16, u32, 16A4, 16, 8, (u16), Frame, AA.x, AA.y);
}
else if (s_AAx)
else if (AA.x)
{
RESOLVE_32BIT(16Z, u16, u32, 16A2, 16, 8, (u16), Frame, 1, 0);
}
@ -3209,11 +3174,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo
case PSMT16SZ:
if (s_AAy)
if (AA.y)
{
RESOLVE_32BIT(16SZ, u16, u32, 16A4, 16, 8, (u16), Frame, s_AAx, s_AAy);
RESOLVE_32BIT(16SZ, u16, u32, 16A4, 16, 8, (u16), Frame, AA.x, AA.y);
}
else if (s_AAx)
else if (AA.x)
{
RESOLVE_32BIT(16SZ, u16, u32, 16A2, 16, 8, (u16), Frame, 1, 0);
}
@ -3234,11 +3199,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo
case PSMCT24:
if (s_AAy)
if (AA.y)
{
RESOLVE_32BIT(32, u32, Vector_16F, 32A4, 8, 8, Float16ToARGB, Frame16, 1, 1);
}
else if (s_AAx)
else if (AA.x)
{
RESOLVE_32BIT(32, u32, Vector_16F, 32A2, 8, 8, Float16ToARGB, Frame16, 1, 0);
}
@ -3251,11 +3216,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo
case PSMCT16:
if (s_AAy)
if (AA.y)
{
RESOLVE_32BIT(16, u16, Vector_16F, 16A4, 16, 8, Float16ToARGB16, Frame16, 1, 1);
}
else if (s_AAx)
else if (AA.x)
{
RESOLVE_32BIT(16, u16, Vector_16F, 16A2, 16, 8, Float16ToARGB16, Frame16, 1, 0);
}
@ -3268,11 +3233,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo
case PSMCT16S:
if (s_AAy)
if (AA.y)
{
RESOLVE_32BIT(16S, u16, Vector_16F, 16A4, 16, 8, Float16ToARGB16, Frame16, 1, 1);
}
else if (s_AAx)
else if (AA.x)
{
RESOLVE_32BIT(16S, u16, Vector_16F, 16A2, 16, 8, Float16ToARGB16, Frame16, 1, 0);
}
@ -3287,11 +3252,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo
case PSMT24Z:
if (s_AAy)
if (AA.y)
{
RESOLVE_32BIT(32Z, u32, Vector_16F, 32ZA4, 8, 8, Float16ToARGB_Z, Frame16, 1, 1);
}
else if (s_AAx)
else if (AA.x)
{
RESOLVE_32BIT(32Z, u32, Vector_16F, 32ZA2, 8, 8, Float16ToARGB_Z, Frame16, 1, 0);
}
@ -3304,11 +3269,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo
case PSMT16Z:
if (s_AAy)
if (AA.y)
{
RESOLVE_32BIT(16Z, u16, Vector_16F, 16ZA4, 16, 8, Float16ToARGB16_Z, Frame16, 1, 1);
}
else if (s_AAx)
else if (AA.x)
{
RESOLVE_32BIT(16Z, u16, Vector_16F, 16ZA2, 16, 8, Float16ToARGB16_Z, Frame16, 1, 0);
}
@ -3321,11 +3286,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo
case PSMT16SZ:
if (s_AAy)
if (AA.y)
{
RESOLVE_32BIT(16SZ, u16, Vector_16F, 16ZA4, 16, 8, Float16ToARGB16_Z, Frame16, 1, 1);
}
else if (s_AAx)
else if (AA.x)
{
RESOLVE_32BIT(16SZ, u16, Vector_16F, 16ZA2, 16, 8, Float16ToARGB16_Z, Frame16, 1, 0);
}

View File

@ -27,13 +27,6 @@
#define GL_TEXTURE_RECTANGLE GL_TEXTURE_RECTANGLE_NV
#endif
inline Vector DefaultOneColor(FRAGMENTSHADER ptr)
{
Vector v = Vector(1, 1, 1, 1);
cgGLSetParameter4fv(ptr.sOneColor, v);
return v ;
}
namespace ZeroGS
{
@ -206,24 +199,19 @@ extern CRenderTargetMngr s_RTs, s_DepthRTs;
extern CBitwiseTextureMngr s_BitwiseTextures;
extern CMemoryTargetMngr g_MemTargs;
extern u8 s_AAx, s_AAy, s_AAz, s_AAw;
//extern u8 s_AAx, s_AAy;
extern Point AA;
// Real rendered width, depends on AA and AAneg.
// Real rendered width, depends on AA.
inline int RW(int tbw)
{
if (s_AAx >= s_AAz)
return (tbw << (s_AAx - s_AAz));
else
return (tbw >> (s_AAz - s_AAx));
return (tbw << AA.x);
}
// Real rendered height, depends on AA and AAneg.
// Real rendered height, depends on AA.
inline int RH(int tbh)
{
if (s_AAy >= s_AAw)
return (tbh << (s_AAy - s_AAw));
else
return (tbh >> (s_AAw - s_AAy));
return (tbh << AA.y);
}
/* inline void CreateTargetsList(int start, int end, list<ZeroGS::CRenderTarget*>& listTargs) {
@ -242,10 +230,6 @@ inline list<ZeroGS::CRenderTarget*> CreateTargetsList(int start, int end)
extern Vector g_vdepth;
extern int icurctx;
extern VERTEXSHADER pvsBitBlt;
extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne;
extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
extern GLuint vboRect;
// Unworking

View File

@ -32,6 +32,7 @@
#include "zpipe.h"
#include "targets.h"
#include "GLWin.h"
#include "ZZoglShaders.h"
//----------------------- Defines
@ -50,7 +51,7 @@ extern int g_nFrame, g_nRealFrame;
//-------------------------- Variables
primInfo *prim;
CGprogram g_vsprog = 0, g_psprog = 0; // 2 -- ZZ
ZZshProgram g_vsprog = 0, g_psprog = 0; // 2 -- ZZ
inline u32 FtoDW(float f) { return (*((u32*)&f)); }
@ -81,7 +82,7 @@ PFNGLDRAWBUFFERSPROC glDrawBuffers = NULL;
/////////////////////
// graphics resources
CGparameter g_vparamPosXY[2] = {0}, g_fparamFogColor = 0;
ZZshParameter g_vparamPosXY[2] = {0}, g_fparamFogColor = 0;
bool s_bTexFlush = false;
int s_nLastResolveReset = 0;
@ -104,10 +105,6 @@ GLenum GetRenderTargetFormat() { return GetRenderFormat() == RFT_byte8 ? 4 : g_i
// returns the first and last addresses aligned to a page that cover
void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw);
// bool LoadEffects();
// bool LoadExtraEffects();
// FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
int s_nNewWidth = -1, s_nNewHeight = -1;
void ChangeDeviceSize(int nNewWidth, int nNewHeight);
@ -343,8 +340,7 @@ extern RasterFont* font_p;
void ZeroGS::DrawText(const char* pstr, int left, int top, u32 color)
{
FUNCLOG
cgGLDisableProfile(cgvProf);
cgGLDisableProfile(cgfProf);
ZZshGLDisableProfile();
Vector v;
v.SetColor(color);
@ -352,8 +348,7 @@ void ZeroGS::DrawText(const char* pstr, int left, int top, u32 color)
//glColor3f(((color >> 16) & 0xff) / 255.0f, ((color >> 8) & 0xff)/ 255.0f, (color & 0xff) / 255.0f);
font_p->printString(pstr, left * 2.0f / (float)nBackbufferWidth - 1, 1 - top * 2.0f / (float)nBackbufferHeight, 0);
cgGLEnableProfile(cgvProf);
cgGLEnableProfile(cgfProf);
ZZshGLEnableProfile();
}
void ZeroGS::ChangeWindowSize(int nNewWidth, int nNewHeight)
@ -409,42 +404,10 @@ void ZeroGS::ChangeDeviceSize(int nNewWidth, int nNewHeight)
assert(vb[0].pBufferData != NULL && vb[1].pBufferData != NULL);
}
void ZeroGS::SetNegAA(int mode)
{
FUNCLOG
// need to flush all targets
s_RTs.ResolveAll();
s_RTs.Destroy();
s_DepthRTs.ResolveAll();
s_DepthRTs.Destroy();
s_AAz = s_AAw = 0; // This is code for x0, x2, x4, x8 and x16 anti-aliasing.
if (mode > 0)
{
s_AAz = (mode + 1) / 2; // ( 1, 0 ) ; ( 1, 1 ) -- it's used as binary shift, so x << s_AAz, y << s_AAw
s_AAw = mode / 2;
}
memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts));
s_nLastResolveReset = 0;
vb[0].prndr = NULL;
vb[0].pdepth = NULL;
vb[0].bNeedFrameCheck = 1;
vb[0].bNeedZCheck = 1;
vb[1].prndr = NULL;
vb[1].pdepth = NULL;
vb[1].bNeedFrameCheck = 1;
vb[1].bNeedZCheck = 1;
}
void ZeroGS::SetAA(int mode)
{
FUNCLOG
float f;
float f = 1.0f;
// need to flush all targets
s_RTs.ResolveAll();
@ -452,28 +415,28 @@ void ZeroGS::SetAA(int mode)
s_DepthRTs.ResolveAll();
s_DepthRTs.Destroy();
s_AAx = s_AAy = 0; // This is code for x0, x2, x4, x8 and x16 anti-aliasing.
AA.x = AA.y = 0; // This is code for x0, x2, x4, x8 and x16 anti-aliasing.
if (mode > 0)
{
s_AAx = (mode + 1) / 2; // ( 1, 0 ) ; ( 1, 1 ) ; ( 2, 1 ) ; ( 2, 2 ) -- it's used as binary shift, so x >> s_AAx, y >> s_AAy
s_AAy = mode / 2;
// ( 1, 0 ) ; ( 1, 1 ) ; ( 2, 1 ) ; ( 2, 2 )
// it's used as a binary shift, so x >> AA.x, y >> AA.y
AA.x = (mode + 1) / 2;
AA.y = mode / 2;
f = 2.0f;
}
memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts));
s_nLastResolveReset = 0;
vb[0].prndr = NULL;
vb[0].pdepth = NULL;
vb[0].bNeedFrameCheck = 1;
vb[0].bNeedZCheck = 1;
vb[1].prndr = NULL;
vb[1].pdepth = NULL;
vb[1].bNeedFrameCheck = 1;
vb[1].bNeedZCheck = 1;
vb[0].bNeedFrameCheck = vb[0].bNeedZCheck = 1;
vb[1].bNeedFrameCheck = vb[1].bNeedZCheck = 1;
f = mode > 0 ? 2.0f : 1.0f;
glPointSize(f);
}
@ -486,14 +449,6 @@ void ZeroGS::Prim()
if (curvb.CheckPrim()) Flush(prim->ctxt);
curvb.curprim._val = prim->_val;
// flush the other pipe if sharing the same buffer
// if( vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp && vb[!prim->ctxt].nCount > 0 )
// {
// assert( vb[prim->ctxt].nCount == 0 );
// Flush(!prim->ctxt);
// }
curvb.curprim.prim = prim->prim;
}
@ -537,25 +492,24 @@ void ZeroGS::RenderCustom(float fAlpha)
// tex coords
Vector v = Vector(1 / 32767.0f, 1 / 32767.0f, 0, 0);
ZZcgSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_fBitBltPos");
ZZshSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_fBitBltPos");
v.x = (float)nLogoWidth;
v.y = (float)nLogoHeight;
ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
v.x = v.y = v.z = v.w = fAlpha;
ZZcgSetParameter4fv(ppsBaseTexture.sOneColor, v, "g_fOneColor");
ZZshSetParameter4fv(ppsBaseTexture.sOneColor, v, "g_fOneColor");
if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
// inside vhDCb[0]'s target area, so render that region only
cgGLSetTextureParameter(ppsBaseTexture.sFinal, ptexLogo);
cgGLEnableTextureParameter(ppsBaseTexture.sFinal);
ZZshGLSetTextureParameter(ppsBaseTexture.sFinal, ptexLogo, "Logo");
glBindBuffer(GL_ARRAY_BUFFER, vboRect);
SET_STREAM();
SETVERTEXSHADER(pvsBitBlt.prog);
SETPIXELSHADER(ppsBaseTexture.prog);
ZZshSetVertexShader(pvsBitBlt.prog);
ZZshSetPixelShader(ppsBaseTexture.prog);
DrawTriangleArray();
// restore
@ -657,7 +611,7 @@ void ZeroGS::KickPoint()
curvb.NotifyWrite(1);
int last = (gs.primIndex + 2) % ARRAY_SIZE(gs.gsvertex);
int last = gs.primNext(2);
VertexGPU* p = curvb.pBufferData + curvb.nCount;
SET_VERTEX(&p[0], last, curvb);
@ -682,8 +636,8 @@ void ZeroGS::KickLine()
curvb.NotifyWrite(2);
int next = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex);
int last = (gs.primIndex + 2) % ARRAY_SIZE(gs.gsvertex);
int next = gs.primNext();
int last = gs.primNext(2);
VertexGPU* p = curvb.pBufferData + curvb.nCount;
SET_VERTEX(&p[0], next, curvb);
@ -748,7 +702,7 @@ void ZeroGS::KickTriangleFan()
// add 1 to skip the first vertex
if (gs.primIndex == gs.nTriFanVert) gs.primIndex = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex);
if (gs.primIndex == gs.nTriFanVert) gs.primIndex = gs.primNext();
OUTPUT_VERT(p[0], 0);
OUTPUT_VERT(p[1], 1);
@ -777,13 +731,12 @@ void ZeroGS::KickSprite()
}
curvb.NotifyWrite(6);
int next = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex);
int last = (gs.primIndex + 2) % ARRAY_SIZE(gs.gsvertex);
int next = gs.primNext();
int last = gs.primNext(2);
// sprite is too small and AA shows lines (tek4, Mana Khemia)
gs.gsvertex[last].x += (4*s_AAx);
gs.gsvertex[last].y += (4*s_AAy);
gs.gsvertex[last].x += (4 * AA.x);
gs.gsvertex[last].y += (4 * AA.y);
// might be bad sprite (KH dialog text)
//if( gs.gsvertex[next].x == gs.gsvertex[last].x || gs.gsvertex[next].y == gs.gsvertex[last].y )
@ -832,11 +785,8 @@ void ZeroGS::SetFogColor(u32 fog)
Vector v;
// set it immediately
// v.x = (gs.fogcol & 0xff) / 255.0f;
// v.y = ((gs.fogcol >> 8) & 0xff) / 255.0f;
// v.z = ((gs.fogcol >> 16) & 0xff) / 255.0f;
v.SetColor(gs.fogcol);
ZZcgSetParameter4fv(g_fparamFogColor, v, "g_fParamFogColor");
ZZshSetParameter4fv(g_fparamFogColor, v, "g_fParamFogColor");
// }
}
@ -851,7 +801,7 @@ void ZeroGS::SetFogColor(GIFRegFOGCOL* fog)
v.x = fog->FCR / 255.0f;
v.y = fog->FCG / 255.0f;
v.z = fog->FCB / 255.0f;
ZZcgSetParameter4fv(g_fparamFogColor, v, "g_fParamFogColor");
ZZshSetParameter4fv(g_fparamFogColor, v, "g_fParamFogColor");
}
void ZeroGS::ExtWrite()

View File

@ -34,12 +34,9 @@
#include "GS.h"
#include "CRC.h"
#include "rasterfont.h" // simple font
#include "ZeroGSShaders/zerogsshaders.h"
using namespace std;
//------------------------ Constants ----------------------
#define VB_BUFFERSIZE 0x400
@ -48,7 +45,6 @@ const float g_filog32 = 0.999f / (32.0f * logf(2.0f));
//------------------------ Inlines -------------------------
// Calculate maximum height for target
inline int get_maxheight(int fbp, int fbw, int psm)
{
@ -62,29 +58,13 @@ inline int get_maxheight(int fbp, int fbw, int psm)
return ret;
}
// Does psm need Alpha test with alpha expansion?
inline int nNeedAlpha(u8 psm)
{
return (psm == PSMCT24 || psm == PSMCT16 || psm == PSMCT16S);
}
// Get color storage model psm, that is important on flush stage.
inline u8 GetTexCPSM(const tex0Info& tex)
{
if (PSMT_ISCLUT(tex.psm))
return tex.cpsm;
else
return tex.psm;
}
// ------------------------ Variables -------------------------
// all textures have this width
//#define GPU_TEXWIDTH 512
extern int GPU_TEXWIDTH;
extern float g_fiGPU_TEXWIDTH;
#define MASKDIVISOR 0
#define GPU_TEXMASKWIDTH (1024 >> MASKDIVISOR) // bitwise mask width for region repeat mode
#define MASKDIVISOR 0 // Used for decrement bitwise mask texture size if 1024 is too big
#define GPU_TEXMASKWIDTH (1024 >> MASKDIVISOR) // bitwise mask width for region repeat mode
extern u32 ptexBilinearBlocks;
@ -423,15 +403,6 @@ union
};
// Return, if tcc, aem or psm mode told us, than Alpha test should be used
// if tcc == 0 than no alpha used, aem used for alpha expanding and I am not sure
// that it's correct, psm -- color mode,
inline bool
IsAlphaTestExpansion(VB& curvb)
{
return (curvb.tex0.tcc && gs.texa.aem && nNeedAlpha(GetTexCPSM(curvb.tex0)));
}
// visible members
extern DrawFn drawfn[8];
@ -441,17 +412,6 @@ extern float fiTexWidth[2], fiTexHeight[2]; // current tex width and height
extern vector<GLuint> g_vboBuffers; // VBOs for all drawing commands
extern GLuint vboRect;
extern int g_nCurVBOIndex;
// Shaders variables
extern Vector g_vdepth;
extern Vector vlogz;
extern VERTEXSHADER pvsBitBlt;
extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne;
extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
bool LoadEffects();
bool LoadExtraEffects();
FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
extern RenderFormatType g_RenderFormatType;
void AddMessage(const char* pstr, u32 ms = 5000);
@ -460,7 +420,6 @@ void ChangeWindowSize(int nNewWidth, int nNewHeight);
void SetChangeDeviceSize(int nNewWidth, int nNewHeight);
void ChangeDeviceSize(int nNewWidth, int nNewHeight);
void SetAA(int mode);
void SetNegAA(int mode);
void SetCRC(int crc);
void ReloadEffects();
@ -545,10 +504,6 @@ void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h
void SetContextTarget(int context) ;
void NeedFactor(int w);
// only sets a limited amount of state (for Update)
void SetTexClamping(int context, FRAGMENTSHADER* pfragment);
void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, ZeroGS::CMemoryTarget* pmemtarg, FRAGMENTSHADER* pfragment, int force);
void ResetAlphaVariables();
void StartCapture();