fpPS4/chip/ps4_videodrv.pas
2023-03-05 13:44:11 +03:00

2783 lines
78 KiB
ObjectPascal
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

unit ps4_videodrv;
{$mode objfpc}{$H+}
{/$define ww}
{/$define null_rt}
interface
uses
Windows,
Classes,
SysUtils,
LFQueue,
bittype,
sys_crt,
sys_types,
sys_kernel,
ps4_libSceVideoOut,
ps4_pssl,
ps4_shader,
pm4defs,
//ps4_Tiling,
Vulkan,
vDevice,
vMemory,
vShader,
vPipeline,
vImage,
vRender,
vCmdBuffer,
vShaderExt,
vShaderManager,
vPipelineLayoutManager,
vSetsPoolManager,
vHostBufferManager,
vImageManager,
vSampler,
vSamplerManager,
vRenderPassManager,
si_ci_vi_merged_offset,
si_ci_vi_merged_enum,
si_ci_vi_merged_registers
;
type
PvSubmitInfo=^TvSubmitInfo;
TvSubmitInfo=packed record
count:DWORD;
dcbGpuAddrs:PPointer;
dcbSizesInBytes:PDWORD;
ccbGpuAddrs:PPointer;
ccbSizesInBytes:PDWORD;
end;
const
kIndirectBufferMaximumSizeInBytes=$3FFFFC;
function vSubmitCommandBuffers(
Submit:PvSubmitInfo;
Flip:PqcFlipInfo):Integer;
procedure vSubmitDone;
function vSubmitsAllowed:Boolean;
var
DEPTH_DISABLE_HACK:Boolean=false;
COMPUTE_DISABLE_HACK:Boolean=false;
implementation
Uses
ntapi,
atomic,
spinlock,
ps4_libSceGnmDriver,
ps4_gpu_regs,
shader_dump;
type
PvSubmitNode=^TvSubmitNode;
TvSubmitNode=record
next_:PvSubmitNode;
//
Submit:TvSubmitInfo;
Flip:TqcFlipInfo;
end;
TvCmdRing=object
Queue:TIntrusiveMPSCQueue;
Current:PvSubmitNode;
CmdBuffer:TvCmdBuffer;
submit_id:ptruint;
SetShCount:ptruint;
SetCxCount:ptruint;
LastSetReg:WORD;
Procedure Init;
Function Next:Boolean;
Procedure AllocCmdBuffer;
end;
pvMeFlipInfo=^TvMeFlipInfo;
TvMeFlipInfo=record
qcInfo:TqcFlipInfo;
FlipLData:DWORD;
FlipLabel:PDWORD;
Interrupt:Boolean;
end;
pvMeEopInfo=^TvMeEopInfo;
TvMeEopInfo=record
adr:Pointer;
data:QWORD;
dataSel:Byte;
Interrupt:Boolean;
end;
PvMeWaitMemInfo=^TvMeWaitMemInfo;
TvMeWaitMemInfo=record
adr:Pointer;
ref:DWORD;
mask:DWORD;
cFunc:Byte;
end;
TvMicroEngineType=(metCmdBuffer,metFlip,metEop,metWaitMem);
PvMicroEngineNode=^TvMicroEngineNode;
TvMicroEngineNode=record
next_:PvMicroEngineNode;
//
mode:TvMicroEngineType;
Case Byte of
0:(CmdBuffer:TvCmdBuffer);
1:(FlipInfo:TvMeFlipInfo);
2:(EopInfo:TvMeEopInfo);
3:(WaitMem:TvMeWaitMemInfo);
end;
TvMicroEngine=object
Queue:TIntrusiveMPSCQueue;
Current:PvMicroEngineNode;
Procedure Init;
Function Next:Boolean;
Procedure PushCmd(var Cmd:TvCmdBuffer);
Procedure PushFlip(var qcInfo:TqcFlipInfo;FlipLData:DWORD;FlipLabel:PDWORD;Interrupt:Boolean);
Procedure PushEop(adr:Pointer;data:QWORD;dataSel:Byte;Interrupt:Boolean);
Procedure PushWaitMem(adr:Pointer;ref,mask:DWORD;cFunc:Byte);
end;
var
_gfx_lazy_init:Integer=0;
_gfx_handle:Thandle=0;
GFXRing:TvCmdRing;
GFXMicroEngine:TvMicroEngine;
FIdleEvent:THandle=0;
FSubmitAllowed:QWORD=0;
GPU_REGS:TGPU_REGS;
FCmdPool:TvCmdPool;
Procedure TvCmdRing.Init;
begin
Queue.Create
end;
Function TvCmdRing.Next:Boolean;
begin
Result:=False;
if (Current<>nil) then
begin
FreeMem(Current);
Current:=nil;
Inc(submit_id);
end;
LastSetReg:=0;
Result:=Queue.Pop(Current);
end;
Procedure TvCmdRing.AllocCmdBuffer;
begin
InitVulkan;
if (FCmdPool=nil) then
begin
FCmdPool:=TvCmdPool.Create(VulkanApp.FGFamily);
end;
if (CmdBuffer=nil) then
begin
CmdBuffer:=TvCmdBuffer.Create(FCmdPool,RenderQueue);
CmdBuffer.submit_id:=submit_id;
end;
end;
Procedure TvMicroEngine.Init;
begin
Queue.Create;
end;
Function TvMicroEngine.Next:Boolean;
begin
Result:=False;
if (Current<>nil) then
begin
FreeMem(Current);
Current:=nil;
end;
Result:=Queue.Pop(Current);
end;
Procedure TvMicroEngine.PushCmd(var Cmd:TvCmdBuffer);
var
node:PvMicroEngineNode;
begin
if (Cmd=nil) then Exit;
node:=AllocMem(SizeOf(TvMicroEngineNode));
if (node=nil) then Exit;
node^.mode:=metCmdBuffer;
node^.CmdBuffer:=Cmd;
Queue.Push(node);
Cmd:=nil;
end;
Procedure TvMicroEngine.PushFlip(var qcInfo:TqcFlipInfo;FlipLData:DWORD;FlipLabel:PDWORD;Interrupt:Boolean);
var
node:PvMicroEngineNode;
begin
node:=AllocMem(SizeOf(TvMicroEngineNode));
if (node=nil) then Exit;
node^.mode :=metFlip;
node^.FlipInfo.qcInfo :=qcInfo;
node^.FlipInfo.FlipLData:=FlipLData;
node^.FlipInfo.FlipLabel:=FlipLabel;
node^.FlipInfo.Interrupt:=Interrupt;
Queue.Push(node);
qcInfo:=Default(TqcFlipInfo);
end;
Procedure TvMicroEngine.PushEop(adr:Pointer;data:QWORD;dataSel:Byte;Interrupt:Boolean);
var
node:PvMicroEngineNode;
begin
node:=AllocMem(SizeOf(TvMicroEngineNode));
if (node=nil) then Exit;
node^.mode :=metEop;
node^.EopInfo.adr :=adr;
node^.EopInfo.data :=data;
node^.EopInfo.dataSel :=dataSel;
node^.EopInfo.Interrupt:=Interrupt;
Queue.Push(node);
end;
Procedure TvMicroEngine.PushWaitMem(adr:Pointer;ref,mask:DWORD;cFunc:Byte);
var
node:PvMicroEngineNode;
begin
node:=AllocMem(SizeOf(TvMicroEngineNode));
if (node=nil) then Exit;
node^.mode :=metWaitMem;
node^.WaitMem.adr :=adr;
node^.WaitMem.ref :=ref;
node^.WaitMem.mask :=mask;
node^.WaitMem.cFunc:=cFunc;
Queue.Push(node);
end;
procedure gfx_cp_parser(node:PvSubmitInfo); forward;
function gfx_submit(CmdBuffer:TvCmdBuffer):Boolean; forward;
function gfx_test(CmdBuffer:TvCmdBuffer):Boolean; forward;
Function me_flip(node:pvMeFlipInfo):Boolean;
begin
Result:=True;
if (node=nil) then Exit;
if (node^.FlipLabel<>nil) then
begin
node^.FlipLabel^:=node^.FlipLData;
end;
if (node^.qcInfo.hVideo<>0) then
begin
_qc_sceVideoOutSubmitFlip(@node^.qcInfo);
end;
if node^.Interrupt then
begin
post_event_eop;
end;
end;
const
GpuCoreClockFrequency=800000000;
function GetGpuTickCount:QWORD;
var
pc,pf:QWORD;
DW0,DW1:QWORD;
begin
pc:=0;
pf:=1;
NtQueryPerformanceCounter(@pc,@pf);
//DW0*GF/pf + SHL_32* DW1*GF/pf
DW0:=(DWORD(pc shr 00)*GpuCoreClockFrequency) div pf;
DW1:=(DWORD(pc shr 32)*GpuCoreClockFrequency) div pf;
Result:=DW0+(DW1 shl 32);
end;
Function me_eop(node:pvMeEopInfo):Boolean;
begin
Result:=True;
if (node=nil) then Exit;
Case node^.dataSel of
EVENTWRITEEOP_DATA_SEL_DISCARD :;//nop
kEventWriteSource32BitsImmediate :PDWORD(node^.adr)^:=PDWORD(@node^.data)^;
kEventWriteSource64BitsImmediate :PQWORD(node^.adr)^:=PQWORD(@node^.data)^;
kEventWriteSourceGlobalClockCounter ,
kEventWriteSourceGpuCoreClockCounter:PQWORD(node^.adr)^:=GetGpuTickCount;
else
Assert(False);
end;
if (node^.Interrupt) then
begin
post_event_eop;
end;
end;
Function me_test_mem(node:PvMeWaitMemInfo):Boolean;
var
val,ref:DWORD;
begin
val:=PDWORD(node^.adr)^ and node^.mask;
ref:=node^.ref;
Case node^.cFunc of
WAIT_REG_MEM_FUNC_ALWAYS :Result:=True;
WAIT_REG_MEM_FUNC_LESS :Result:=(val<ref);
WAIT_REG_MEM_FUNC_LESS_EQUAL :Result:=(val<=ref);
WAIT_REG_MEM_FUNC_EQUAL :Result:=(val=ref);
WAIT_REG_MEM_FUNC_NOT_EQUAL :Result:=(val<>ref);
WAIT_REG_MEM_FUNC_GREATER_EQUAL:Result:=(val>ref);
WAIT_REG_MEM_FUNC_GREATER :Result:=(val>=ref);
else
Assert(false);
end;
end;
Function me_node_test(node:PvMicroEngineNode):Boolean;
begin
Result:=True;
if (node=nil) then Exit;
Case node^.mode of
metCmdBuffer:
begin
Result:=gfx_test(node^.CmdBuffer);
end;
metFlip:Result:=True;
metEop :Result:=True;
metWaitMem:
begin
Result:=me_test_mem(@node^.WaitMem);
end;
else
Assert(false);
end;
end;
Function me_node_submit(node:PvMicroEngineNode):Boolean;
begin
Result:=True;
if (node=nil) then Exit;
Case node^.mode of
metCmdBuffer:
begin
Result:=gfx_submit(node^.CmdBuffer);
end;
metFlip:
begin
Result:=me_flip(@node^.FlipInfo);
end;
metEop:
begin
Result:=me_eop(@node^.EopInfo);
end;
metWaitMem:;
else
Assert(false);
end;
end;
function GFX_thread(p:pointer):ptrint;
var
time:Int64;
work_do:Boolean;
begin
Result:=0;
sys_crt_init;
SetThreadDebugName(GetCurrentThreadId, 'GFX Thread');
repeat
work_do:=False;
if GFXRing.Next then
begin
gfx_cp_parser(@GFXRing.Current^.Submit);
work_do:=True;
end;
if (GFXMicroEngine.Current<>nil) then
begin
if me_node_test(GFXMicroEngine.Current) then
begin
//if (GFXMicroEngine.Current^.mode=metCmdBuffer) then
//begin
// SetEvent(FIdleEvent);
//end;
end else
begin
time:=-1000;
NtDelayExecution(True,@time);
Continue;
end;
work_do:=True;
end;
if GFXMicroEngine.Next then
begin
me_node_submit(GFXMicroEngine.Current);
work_do:=True;
end;
if not work_do then
begin
SetEvent(FIdleEvent);
time:=Int64(NT_INFINITE);
NtDelayExecution(True,@time);
end;
until false;
end;
procedure Init_gfx;
var
t:Thandle;
begin
if XCHG(_gfx_lazy_init,1)=0 then
begin
GFXRing.Init;
GFXMicroEngine.Init;
FIdleEvent:=CreateEvent(nil,True,True,nil);
t:=BeginThread(@GFX_thread);
_gfx_handle:=t;
end else
begin
wait_until_equal(_gfx_handle,0);
end;
end;
procedure _apc_null(dwParam:PTRUINT); stdcall;
begin
end;
function calc_submit_size(node:PvSubmitInfo;var dcbSize,ccbSize:DWORD):Integer;
var
n:DWORD;
begin
Result:=0;
dcbSize:=0;
ccbSize:=0;
if (node=nil) then Exit(SCE_KERNEL_ERROR_EINVAL);
n:=0;
While (n<node^.count) do
begin
if (node^.dcbGpuAddrs<>nil) and (node^.dcbSizesInBytes<>nil) then
begin
if (node^.dcbGpuAddrs[n]<>nil) and (node^.dcbSizesInBytes[n]<>0) then
if (node^.dcbSizesInBytes[n]<>0) then
begin
if (node^.dcbSizesInBytes[n]>kIndirectBufferMaximumSizeInBytes) then
begin
Exit(SCE_KERNEL_ERROR_EINVAL);
end;
dcbSize:=dcbSize+node^.dcbSizesInBytes[n];
end;
end;
if (node^.ccbGpuAddrs<>nil) and (node^.ccbSizesInBytes<>nil) then
begin
if (node^.ccbGpuAddrs[n]<>nil) and (node^.ccbSizesInBytes[n]<>0) then
if (node^.ccbSizesInBytes[n]<>0) then
begin
if (node^.ccbSizesInBytes[n]>kIndirectBufferMaximumSizeInBytes) then
begin
Exit(SCE_KERNEL_ERROR_EINVAL);
end;
ccbSize:=ccbSize+node^.ccbSizesInBytes[n];
end;
end;
Inc(n);
end;
end;
function vSubmitCommandBuffers(
Submit:PvSubmitInfo;
Flip:PqcFlipInfo):Integer;
var
node:PvSubmitNode;
dcbSize:DWORD;
ccbSize:DWORD;
addr:Pointer;
size:DWORD;
begin
Result:=0;
if (Submit=nil) then Exit(SCE_KERNEL_ERROR_EINVAL);
dcbSize:=0;
ccbSize:=0;
Result:=calc_submit_size(Submit,dcbSize,ccbSize);
if (Result<>0) then Exit;
if (dcbSize=0) and (ccbSize=0) then
begin
Exit(SCE_KERNEL_ERROR_EINVAL);
end;
//calc size
size:=SizeOf(TvSubmitNode);
if (Submit^.dcbGpuAddrs<>nil) and (Submit^.dcbSizesInBytes<>nil) then
begin
size:=size+Submit^.count*(SizeOf(Pointer)+SizeOf(DWORD));
end;
if (Submit^.ccbGpuAddrs<>nil) and (Submit^.ccbSizesInBytes<>nil) then
begin
size:=size+Submit^.count*(SizeOf(Pointer)+SizeOf(DWORD));
end;
//alloc
node:=AllocMem(size);
if (node=nil) then
begin
Exit(SCE_KERNEL_ERROR_ENOMEM);
end;
//distrib
addr:=Pointer(node)+SizeOf(TvSubmitNode);
if (Submit^.dcbGpuAddrs<>nil) and (Submit^.dcbSizesInBytes<>nil) then
begin
node^.Submit.dcbGpuAddrs:=addr;
addr:=addr+Submit^.count*SizeOf(Pointer);
node^.Submit.dcbSizesInBytes:=addr;
addr:=addr+Submit^.count*SizeOf(DWORD);
//copy
Move(Submit^.dcbGpuAddrs^ ,node^.Submit.dcbGpuAddrs^ ,Submit^.count*SizeOf(Pointer));
Move(Submit^.dcbSizesInBytes^,node^.Submit.dcbSizesInBytes^,Submit^.count*SizeOf(DWORD));
end;
if (Submit^.ccbGpuAddrs<>nil) and (Submit^.ccbSizesInBytes<>nil) then
begin
node^.Submit.ccbGpuAddrs:=addr;
addr:=addr+Submit^.count*SizeOf(Pointer);
node^.Submit.ccbSizesInBytes:=addr;
addr:=addr+Submit^.count*SizeOf(DWORD);
//copy
Move(Submit^.ccbGpuAddrs^ ,node^.Submit.ccbGpuAddrs^ ,Submit^.count*SizeOf(Pointer));
Move(Submit^.ccbSizesInBytes^,node^.Submit.ccbSizesInBytes^,Submit^.count*SizeOf(DWORD));
end;
node^.Submit.count:=Submit^.count;
Init_gfx;
if (Flip<>nil) then
begin
node^.Flip:=Flip^;
end;
ResetEvent(FIdleEvent);
GFXRing.Queue.Push(node);
NtQueueApcThread(_gfx_handle,@_apc_null,nil,nil,0);
end;
procedure vSubmitDone;
begin
if (FIdleEvent<>0) then
if CAS(FSubmitAllowed,0,1) then
begin
NtWaitForSingleObject(FIdleEvent,False,nil); //wait idle GPU
store_release(FSubmitAllowed,0);
end;
end;
function vSubmitsAllowed:Boolean;
begin
Result:=FSubmitAllowed=0;
end;
procedure onPrepareFlip();
begin
GFXMicroEngine.PushCmd(GFXRing.CmdBuffer);
GFXMicroEngine.PushFlip(GFXRing.Current^.Flip,0,nil,False);
end;
procedure onPrepareFlipLabel(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4PrepareFlip);
var
adr:PDWORD;
begin
QWORD(adr):=QWORD(Body^.ADDRES_LO) or (QWORD(Body^.ADDRES_HI) shl $20);
{$ifdef ww}Writeln('adr:',HexStr(adr),' data:',HexStr(Body^.DATA,8));{$endif}
GFXMicroEngine.PushCmd(GFXRing.CmdBuffer);
GFXMicroEngine.PushFlip(GFXRing.Current^.Flip,Body^.DATA,adr,False);
end;
procedure onPrepareFlipWithEopInterrupt(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4PrepareFlipWithEopInterrupt);
begin
{$ifdef ww}writeln;{$endif}
GFXMicroEngine.PushCmd(GFXRing.CmdBuffer);
GFXMicroEngine.PushFlip(GFXRing.Current^.Flip,0,nil,True);
end;
procedure onPrepareFlipWithEopInterruptLabel(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4PrepareFlipWithEopInterrupt);
var
adr:PDWORD;
begin
QWORD(adr):=QWORD(Body^.ADDRES_LO) or (QWORD(Body^.ADDRES_HI) shl $20);
{$ifdef ww}Writeln('adr:',HexStr(adr),' data:',HexStr(Body^.DATA,8));{$endif}
GFXMicroEngine.PushCmd(GFXRing.CmdBuffer);
GFXMicroEngine.PushFlip(GFXRing.Current^.Flip,Body^.DATA,adr,True);
end;
procedure onEventWriteEop(pm4Hdr:PM4_TYPE_3_HEADER;Body:PEVENTWRITEEOP);
var
adr:Pointer;
begin
Assert(Body^.EVENT_CNTL.EVENT_INDEX=EVENT_WRITE_INDEX_ANY_EOP_TIMESTAMP);
{$ifdef ww}
Case Body^.EVENT_CNTL.EVENT_TYPE of
kEopFlushCbDbCaches :Writeln('kEopFlushCbDbCaches');
kEopFlushAndInvalidateCbDbCaches:Writeln('kEopFlushAndInvalidateCbDbCaches');
kEopCbDbReadsDone :Writeln('kEopCbDbReadsDone');
else
Assert(false);
end;
{$endif}
if (Body^.DATA_CNTL.destTcL2<>0) then
begin
{$ifdef ww}Writeln('kEventWriteDestTcL2');{$endif}
end else
begin
QWORD(adr):=QWORD(Body^.ADDRESS_LO) or (QWORD(Body^.DATA_CNTL.addressHi) shl $20);
{$ifdef ww}
Case Body^.DATA_CNTL.dataSel of
kEventWriteSource32BitsImmediate :Writeln('kEventWriteSource32BitsImmediate adr:',HexStr(adr),' data:',HexStr(Body^.DATA_LO,8));
kEventWriteSource64BitsImmediate :Writeln('kEventWriteSource64BitsImmediate adr:',HexStr(adr),' data:',HexStr(PQWORD(@Body^.DATA_LO)^,16));
kEventWriteSourceGlobalClockCounter :Writeln('kEventWriteSourceGlobalClockCounter adr:',HexStr(adr),' data:',GetTickCount64*1000);
kEventWriteSourceGpuCoreClockCounter:Writeln('kEventWriteSourceGpuCoreClockCounter adr:',HexStr(adr),' data:',GetTickCount64*1000);
else;
end;
{$endif}
GFXMicroEngine.PushCmd(GFXRing.CmdBuffer);
GFXMicroEngine.PushEop(adr,PQWORD(@Body^.DATA_LO)^,Body^.DATA_CNTL.dataSel,True{(Body^.DATA_CNTL.intSel<>0)});
end;
end;
procedure onEventWriteEos(pm4Hdr:PM4_TYPE_3_HEADER;Body:PTPM4CMDEVENTWRITEEOS);
var
adr:PDWORD;
begin
{$ifdef ww}
Case Body^.eventType of
CS_DONE:Writeln(' CS_DONE');
PS_DONE:Writeln(' PS_DONE');
else
Assert(False,IntToStr(Body^.eventType));
end;
{$endif}
Case Body^.eventIndex of
EVENT_WRITE_EOS_INDEX_CSDONE_PSDONE:
begin
Case Body^.command of
//EVENT_WRITE_EOS_CMD_STORE_APPEND_COUNT_TO_MEMORY:;
//EVENT_WRITE_EOS_CMD_STORE_GDS_DATA_TO_MEMORY :;
EVENT_WRITE_EOS_CMD_STORE_32BIT_DATA_TO_MEMORY :
begin
QWORD(adr):=QWORD(Body^.addressLo) or (QWORD(Body^.addressHi) shl $20);
{$ifdef ww}Writeln('adr:',HexStr(adr),' data:',Body^.DATA){$endif};
GFXRing.AllocCmdBuffer;
GFXRing.CmdBuffer.writeAtEndOfShader(Body^.eventType,adr,Body^.DATA);
end;
else
Assert(False);
end;
end;
else
Assert(False);
end;
end;
procedure onEventWrite(pm4Hdr:PM4_TYPE_3_HEADER;Body:PTPM4CMDEVENTWRITE);
begin
{$ifdef ww}
Case Body^.eventType of
CACHE_FLUSH_AND_INV_EVENT :Writeln(' FLUSH_AND_INV_EVENT');
FLUSH_AND_INV_CB_PIXEL_DATA:Writeln(' FLUSH_AND_INV_CB_PIXEL_DATA');
FLUSH_AND_INV_DB_DATA_TS :Writeln(' FLUSH_AND_INV_DB_DATA_TS');
FLUSH_AND_INV_DB_META :Writeln(' FLUSH_AND_INV_DB_META');
FLUSH_AND_INV_CB_DATA_TS :Writeln(' FLUSH_AND_INV_CB_DATA_TS');
FLUSH_AND_INV_CB_META :Writeln(' FLUSH_AND_INV_CB_META');
THREAD_TRACE_MARKER :Writeln(' THREAD_TRACE_MARKER');
else
Assert(False,IntToStr(Body^.eventType));
end;
Case Body^.EVENTINDEX of
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP :Writeln(' ANY_NON_TIMESTAMP');
EVENT_WRITE_INDEX_ZPASS_DONE :Writeln(' ZPASS_DONE');
EVENT_WRITE_INDEX_SAMPLE_PIPELINESTAT :Writeln(' SAMPLE_PIPELINESTATS');
EVENT_WRITE_INDEX_SAMPLE_STREAMOUTSTATS:Writeln(' SAMPLE_STREAMOUTSTAT[S|S1|S2|S3]');
EVENT_WRITE_INDEX_VS_PS_PARTIAL_FLUSH :Writeln(' [CS|VS|PS]_PARTIAL_FLUSH');
EVENT_WRITE_INDEX_ANY_EOP_TIMESTAMP :Writeln(' ANY_EOP_TIMESTAMP');
EVENT_WRITE_INDEX_ANY_EOS_TIMESTAMP :Writeln(' ANY_EOS_TIMESTAMP');
EVENT_WRITE_INDEX_CACHE_FLUSH_EVENT :Writeln(' CACHE_FLUSH, CACHE_FLUSH_AND_INV_EVENT');
else
Assert(False);
end;
{$endif}
end;
procedure onPfpSyncMe(pm4Hdr:PM4_TYPE_3_HEADER;Body:Pointer);
begin
//wait ME idle in PFP
end;
procedure onDMAData(pm4Hdr:PM4_TYPE_3_HEADER;Body:PTPM4DMADATA);
var
adrSrc,adrDst:PDWORD;
srcSel,dstSel:DWORD;
begin
srcSel:=((PDWORD(Body)[0] shr $1d) and 3) or ((PDWORD(Body)[5] shr $19) and 8) or ((PDWORD(Body)[5] shr $18) and 4);
dstSel:=((PDWORD(Body)[0] shr $14) and 1) or ((PDWORD(Body)[5] shr $1a) and 8) or ((PDWORD(Body)[5] shr $19) and 4);
QWORD(adrSrc):=QWORD(Body^.srcAddrLo) or (QWORD(Body^.srcAddrHi) shl $20);
QWORD(adrDst):=QWORD(Body^.dstAddrLo) or (QWORD(Body^.dstAddrHi) shl $20);
//Flags1.cpSync isBlocking
case srcSel of
kDmaDataSrcMemory,
kDmaDataSrcMemoryUsingL2:
begin
case dstSel of
kDmaDataDstMemory:
begin
Case Body^.Flags1.engine of
CP_DMA_ENGINE_ME:
begin
GFXRing.AllocCmdBuffer;
GFXRing.CmdBuffer.dmaData(adrSrc,adrDst,Body^.Flags2.byteCount,Boolean(Body^.Flags1.cpSync));
//Move(adrSrc^,adrDst^,Body^.Flags2.byteCount);
end;
CP_DMA_ENGINE_PFP:
begin
Move(adrSrc^,adrDst^,Body^.Flags2.byteCount);
end;
end;
end;
kDmaDataDstRegister,
kDmaDataDstRegisterNoIncrement:
begin
if (Body^.dstAddrLo=$3022C) then
begin
{$ifdef ww}Writeln('prefetchIntoL2:',HexStr(adrSrc),' count(DW):',Body^.Flags2.byteCount div 4){$endif};
end else
begin
{$ifdef ww}Writeln('SetRegister:',HexStr(Body^.dstAddrLo shr 2,4),' count(DW):',Body^.Flags2.byteCount div 4){$endif};
Assert(false,'TODO');
end;
end;
else
Assert(False);
end;
end;
kDmaDataSrcData:
begin
case dstSel of
kDmaDataDstMemory:
begin
Case Body^.Flags1.engine of
CP_DMA_ENGINE_ME:
begin
GFXRing.AllocCmdBuffer;
GFXRing.CmdBuffer.dmaData(Body^.srcAddrLo,adrDst,Body^.Flags2.byteCount,Boolean(Body^.Flags1.cpSync));
//FillDWORD(adrDst^,Body^.Flags2.byteCount div 4,Body^.srcAddrLo);
end;
CP_DMA_ENGINE_PFP:
begin
FillDWORD(adrDst^,Body^.Flags2.byteCount div 4,Body^.srcAddrLo);
end;
end;
end;
kDmaDataDstRegister:
{$ifdef ww}Writeln('SetRegister:',HexStr(Body^.dstAddrLo shr 2,4),' count(DW):',Body^.Flags2.byteCount div 4){$endif};
kDmaDataDstRegisterNoIncrement:
{$ifdef ww}Writeln('SetRegisterNoInc:',HexStr(Body^.dstAddrLo shr 2,4),' count(DW):',Body^.Flags2.byteCount div 4){$endif};
kDmaDataDstGds: //max size = kGdsAccessibleMemorySizeInBytes = 0xBF00
{$ifdef ww}Writeln('SetGds:',HexStr(Body^.dstAddrLo,4),' count(DW):',Body^.Flags2.byteCount div 4){$endif};
else
Assert(False);
end;
end;
else
Assert(False);
end;
end;
procedure onWriteData(pm4Hdr:PM4_TYPE_3_HEADER;Body:PTPM4CMDWRITEDATA);
var
adr:PDWORD;
count:Word;
begin
Assert(Body^.CONTROL.wrOneAddr=0);
Case Body^.CONTROL.dstSel of
WRITE_DATA_DST_SEL_MEMORY_SYNC, //writeDataInline
WRITE_DATA_DST_SEL_TCL2, //writeDataInlineThroughL2
WRITE_DATA_DST_SEL_MEMORY_ASYNC:
begin
count:=pm4Hdr.count;
if (count>=3) then
begin
count:=count-2;
QWORD(adr):=QWORD(Body^.dstAddrLo) or (QWORD(Body^.dstAddrHi) shl $20);
{$ifdef ww}Writeln('adr:',HexStr(adr),' data:',PDWORD(@Body^.DATA)^){$endif};
Case Body^.CONTROL.engineSel of
WRITE_DATA_ENGINE_ME:
begin
GFXRing.AllocCmdBuffer;
GFXRing.CmdBuffer.dmaData(@Body^.DATA,adr,count*SizeOf(DWORD),Boolean(Body^.CONTROL.wrConfirm));
//Move(Body^.DATA,adr^,count*SizeOf(DWORD));
end;
WRITE_DATA_ENGINE_PFP:
begin
Move(Body^.DATA,adr^,count*SizeOf(DWORD));
end;
else
Assert(False);
end;
end;
end;
else
Assert(False);
end;
end;
//vkFlushMappedMemoryRanges analog
procedure onAcquireMem(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4ACQUIREMEM);
begin
{$ifdef ww}writeln;{$endif}
end;
procedure onWaitRegMem(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDWAITREGMEM);
var
adr:Pointer;
begin
{$ifdef ww}
Case Body^.engine of
WAIT_REG_MEM_ENGINE_ME:
Case Body^.memSpace of
WAIT_REG_MEM_SPACE_REGISTER:Writeln(' waitOnRegister');
WAIT_REG_MEM_SPACE_MEMORY :Writeln(' waitOnAddress');
else
Assert(false);
end;
WAIT_REG_MEM_ENGINE_PFP:
Case Body^.memSpace of
WAIT_REG_MEM_SPACE_REGISTER:Writeln(' waitOnRegisterAndStall');
WAIT_REG_MEM_SPACE_MEMORY :Writeln(' waitOnAddressAndStall');
else
Assert(false);
end;
else
Assert(false);
end;
{$endif}
Case Body^.engine of
WAIT_REG_MEM_ENGINE_ME:
Case Body^.memSpace of
WAIT_REG_MEM_SPACE_MEMORY:
begin
QWORD(adr):=QWORD(Body^.pollAddressLo) or (QWORD(Body^.pollAddressHi) shl $20);
GFXMicroEngine.PushCmd(GFXRing.CmdBuffer);
GFXMicroEngine.PushWaitMem(adr,Body^.reference,Body^.mask,Body^.compareFunc);
end;
else
Assert(false);
end;
else
Assert(false);
end;
end;
//the first 32K bytes is system registers and beyond that is graphics and multi-media.
//For the first 32KB of register space (system registers)
//there is no SET_* type packet and TYPE-0 packets should be used.
procedure onPm40(pm4Hdr:PM4_TYPE_0_HEADER;Body:PDWORD);
begin
//set system registres
{$ifdef ww}Writeln('PM4_TYPE_0:Reg:',HexStr(pm4Hdr.baseIndex,4),' count(DW):',pm4Hdr.count+1);{$endif}
end;
procedure onPm42(pm4Hdr:PM4_TYPE_2_HEADER);
begin
{$ifdef ww}Writeln('PM4_TYPE_2');{$endif}
end;
procedure onPushMarker(Body:PChar);
begin
{$ifdef ww}Writeln('\HINT_PUSH_MARKER:',Body);{$endif}
end;
procedure onSetMarker(Body:PChar);
begin
{$ifdef ww}Writeln('\HINT_SET_MARKER:',Body);{$endif}
end;
procedure onWidthHeight(Body:PWORD);
begin
{$ifdef ww}Writeln('\HINT_',Body[0],'_',Body[1]);{$endif}
end;
procedure onNop(pm4Hdr:PM4_TYPE_3_HEADER;Body:PDWORD);
begin
Case GFXRing.LastSetReg of
mmCB_COLOR0_FMASK_SLICE,
mmCB_COLOR1_FMASK_SLICE,
mmCB_COLOR2_FMASK_SLICE,
mmCB_COLOR3_FMASK_SLICE,
mmCB_COLOR4_FMASK_SLICE,
mmCB_COLOR5_FMASK_SLICE,
mmCB_COLOR6_FMASK_SLICE,
mmCB_COLOR7_FMASK_SLICE,
mmCB_COLOR0_DCC_BASE,
mmCB_COLOR1_DCC_BASE,
mmCB_COLOR2_DCC_BASE,
mmCB_COLOR3_DCC_BASE,
mmCB_COLOR4_DCC_BASE,
mmCB_COLOR5_DCC_BASE,
mmCB_COLOR6_DCC_BASE,
mmCB_COLOR7_DCC_BASE,
mmDB_HTILE_SURFACE:
begin
onWidthHeight(PWORD(Body));
Exit;
end;
end;
case Body^ of
{$ifdef ww}
OP_HINT_UPDATE_PS_DB_CONTROL :Writeln('\HINT_UPDATE_PS_DB_CONTROL');
OP_HINT_UPDATE_VS_OUT_CNTL :Writeln('\HINT_UPDATE_VS_OUT_CNTL');
OP_HINT_UPDATE_PS_FORMAT :Writeln('\HINT_UPDATE_PS_FORMAT');
OP_HINT_UPDATE_PS_INPUT :Writeln('\HINT_UPDATE_PS_INPUT');
OP_HINT_UPDATE_PS_IN_CONTROL :Writeln('\HINT_UPDATE_PS_IN_CONTROL');
OP_HINT_UPDATE_VS_OUT_CONFIG :Writeln('\HINT_UPDATE_VS_OUT_CONFIG');
OP_HINT_UPDATE_PS_RSRC :Writeln('\HINT_UPDATE_PS_RSRC');
OP_HINT_UPDATE_PS_BARY_CNTL :Writeln('\HINT_UPDATE_PS_BARY_CNTL');
OP_HINT_UPDATE_VS_RSRC :Writeln('\HINT_UPDATE_VS_RSRC');
OP_HINT_UPDATE_VS_POS_FORMAT :Writeln('\HINT_UPDATE_VS_POS_FORMAT');
OP_HINT_WRITE_GPU_PREFETCH_INTO_L2 :Writeln('\HINT_WRITE_GPU_PREFETCH_INTO_L2');
OP_HINT_BASE_ALLOCATE_FROM_COMMAND_BUFFER :Writeln('\HINT_BASE_ALLOCATE_FROM_COMMAND_BUFFER');
OP_HINT_PUSH_MARKER :
onPushMarker(@Body[1]);
OP_HINT_POP_MARKER :Writeln('\HINT_POP_MARKER');
OP_HINT_SET_MARKER:
onSetMarker(@Body[1]);
OP_HINT_SET_VSHARP_IN_USER_DATA :Writeln('\HINT_SET_VSHARP_IN_USER_DATA');
OP_HINT_SET_TSHARP_IN_USER_DATA :Writeln('\HINT_SET_TSHARP_IN_USER_DATA');
OP_HINT_SET_SSHARP_IN_USER_DATA :Writeln('\HINT_SET_SSHARP_IN_USER_DATA');
OP_HINT_SET_USER_DATA_REGION :Writeln('\HINT_SET_USER_DATA_REGION');
OP_HINT_BASE_MARK_DISPATCH_DRAW_ACB_ADDRESS :Writeln('\HINT_BASE_MARK_DISPATCH_DRAW_ACB_ADDRESS');
OP_HINT_INLINE_DATA1 :Writeln('\HINT_INLINE_DATA1');
OP_HINT_INLINE_DATA2 :Writeln('\HINT_INLINE_DATA2');
OP_HINT_SET_DB_RENDER_CONTROL :Writeln('\HINT_SET_DB_RENDER_CONTROL');
OP_HINT_SET_DB_COUNT_CONTROL :Writeln('\HINT_SET_DB_COUNT_CONTROL');
OP_HINT_SET_RENDER_OVERRIDE_CONTROL :Writeln('\HINT_SET_RENDER_OVERRIDE_CONTROL');
OP_HINT_SET_RENDER_OVERRIDE2CONTROL :Writeln('\HINT_SET_RENDER_OVERRIDE2CONTROL');
OP_HINT_SET_PS_SHADER_SAMPLE_EXCLUSION_MASK :Writeln('\HINT_SET_PS_SHADER_SAMPLE_EXCLUSION_MASK');
OP_HINT_SET_DEPTH_BOUNDS_RANGE :Writeln('\HINT_SET_DEPTH_BOUNDS_RANGE');
OP_HINT_SET_STENCIL_CLEAR_VALUE :Writeln('\HINT_SET_STENCIL_CLEAR_VALUE');
OP_HINT_SET_DEPTH_CLEAR_VALUE :Writeln('\HINT_SET_DEPTH_CLEAR_VALUE');
OP_HINT_SET_SCREEN_SCISSOR :Writeln('\HINT_SET_SCREEN_SCISSOR');
OP_HINT_SET_DEPTH_RENDER_TARGET :Writeln('\HINT_SET_DEPTH_RENDER_TARGET');
OP_HINT_SET_BORDER_COLOR_TABLE_ADDR :Writeln('\HINT_SET_BORDER_COLOR_TABLE_ADDR');
OP_HINT_SET_WINDOW_OFFSET :Writeln('\HINT_SET_WINDOW_OFFSET');
OP_HINT_SET_WINDOW_SCISSOR :Writeln('\HINT_SET_WINDOW_SCISSOR');
OP_HINT_SET_CLIP_RECTANGLE_RULE :Writeln('\HINT_SET_CLIP_RECTANGLE_RULE');
OP_HINT_SET_HARDWARE_SCREEN_OFFSET :Writeln('\HINT_SET_HARDWARE_SCREEN_OFFSET');
OP_HINT_SET_RENDER_TARGET_MASK :Writeln('\HINT_SET_RENDER_TARGET_MASK');
OP_HINT_SET_GENERIC_SCISSOR :Writeln('\HINT_SET_GENERIC_SCISSOR');
OP_HINT_SET_PERFMON_ENABLE :Writeln('\HINT_SET_PERFMON_ENABLE');
OP_HINT_SET_SCALED_RESOLUTION_GRID :Writeln('\HINT_SET_SCALED_RESOLUTION_GRID');
OP_HINT_SET_FOVEATED_WINDOW :Writeln('\HINT_SET_FOVEATED_WINDOW');
OP_HINT_SET_INDEX_OFFSET :Writeln('\HINT_SET_INDEX_OFFSET');
OP_HINT_SET_PRIMITIVE_RESET_INDEX :Writeln('\HINT_SET_PRIMITIVE_RESET_INDEX');
OP_HINT_SET_STENCIL_OP_CONTROL :Writeln('\HINT_SET_STENCIL_OP_CONTROL');
OP_HINT_SET_STENCIL :Writeln('\HINT_SET_STENCIL');
OP_HINT_SET_PS_SHADER_USAGE :Writeln('\HINT_SET_PS_SHADER_USAGE');
OP_HINT_SET_GRAPHICS_SCRATCH_SIZE :Writeln('\HINT_SET_GRAPHICS_SCRATCH_SIZE');
OP_HINT_SET_DEPTH_STENCIL_CONTROL :Writeln('\HINT_SET_DEPTH_STENCIL_CONTROL');
OP_HINT_SET_DEPTH_EQAA_CONTROL :Writeln('\HINT_SET_DEPTH_EQAA_CONTROL');
OP_HINT_SET_CB_CONTROL :Writeln('\HINT_SET_CB_CONTROL');
OP_HINT_SET_CLIP_CONTROL :Writeln('\HINT_SET_CLIP_CONTROL');
OP_HINT_SET_PRIMITIVE_SETUP :Writeln('\HINT_SET_PRIMITIVE_SETUP');
OP_HINT_SET_VIEWPORT_TRANSFORM_CONTROL :Writeln('\HINT_SET_VIEWPORT_TRANSFORM_CONTROL');
OP_HINT_SET_OBJECT_ID_MODE :Writeln('\HINT_SET_OBJECT_ID_MODE');
OP_HINT_SET_COMPUTE_SHADER_CONTROL :Writeln('\HINT_SET_COMPUTE_SHADER_CONTROL');
OP_HINT_SET_COMPUTE_SCRATCH_SIZE :Writeln('\HINT_SET_COMPUTE_SCRATCH_SIZE');
OP_HINT_SET_PRIMITIVE_TYPE_BASE :Writeln('\HINT_SET_PRIMITIVE_TYPE_BASE');
OP_HINT_SET_POINT_SIZE :Writeln('\HINT_SET_POINT_SIZE');
OP_HINT_SET_POINT_MIN_MAX :Writeln('\HINT_SET_POINT_MIN_MAX');
OP_HINT_SET_LINE_WIDTH :Writeln('\HINT_SET_LINE_WIDTH');
OP_HINT_SET_GS_MODE :Writeln('\HINT_SET_GS_MODE');
OP_HINT_SET_GS_ON_CHIP_CONTROL :Writeln('\HINT_SET_GS_ON_CHIP_CONTROL');
OP_HINT_SET_SCAN_MODE_CONTROL :Writeln('\HINT_SET_SCAN_MODE_CONTROL');
OP_HINT_SET_PS_SHADER_RATE :Writeln('\HINT_SET_PS_SHADER_RATE');
OP_HINT_SET_PRIMITIVE_ID_ENABLE :Writeln('\HINT_SET_PRIMITIVE_ID_ENABLE');
OP_HINT_SET_PRIMITIVE_RESET_INDEX_ENABLE :Writeln('\HINT_SET_PRIMITIVE_RESET_INDEX_ENABLE');
OP_HINT_SET_DRAW_PAYLOAD_CONTROL :Writeln('\HINT_SET_DRAW_PAYLOAD_CONTROL');
OP_HINT_SET_INSTANCE_STEP_RATE :Writeln('\HINT_SET_INSTANCE_STEP_RATE');
OP_HINT_SETUP_ES_GS_RING_REGISTERS :Writeln('\HINT_SETUP_ES_GS_RING_REGISTERS');
OP_HINT_SET_VERTEX_REUSE_ENABLE :Writeln('\HINT_SET_VERTEX_REUSE_ENABLE');
OP_HINT_SET_HTILE_STENCIL0 :Writeln('\HINT_SET_HTILE_STENCIL0');
OP_HINT_SET_HTILE_STENCIL1 :Writeln('\HINT_SET_HTILE_STENCIL1');
OP_HINT_SETUP_DRAW_OPAQUE_PARAMETERS_1 :Writeln('\HINT_SETUP_DRAW_OPAQUE_PARAMETERS_1');
OP_HINT_SETUP_DRAW_OPAQUE_PARAMETERS_0 :Writeln('\HINT_SETUP_DRAW_OPAQUE_PARAMETERS_0');
OP_HINT_SET_TESSELLATION_DISTRIBUTION_THRESHOLDS :Writeln('\HINT_SET_TESSELLATION_DISTRIBUTION_THRESHOLDS');
OP_HINT_SET_ACTIVE_SHADER_STAGES :Writeln('\HINT_SET_ACTIVE_SHADER_STAGES');
OP_HINT_SETUP_GS_VS_RING_REGISTERS :Writeln('\HINT_SETUP_GS_VS_RING_REGISTERS');
OP_HINT_SET_ALPHA_TO_MASK_CONTROL :Writeln('\HINT_SET_ALPHA_TO_MASK_CONTROL');
OP_HINT_SET_DISPATCH_DRAW_INDEX_DEALLOCATION_MASK:Writeln('\HINT_SET_DISPATCH_DRAW_INDEX_DEALLOCATION_MASK');
OP_HINT_SET_POLYGON_OFFSET_Z_FORMAT :Writeln('\HINT_SET_POLYGON_OFFSET_Z_FORMAT');
OP_HINT_SET_POLYGON_OFFSET_CLAMP :Writeln('\HINT_SET_POLYGON_OFFSET_CLAMP');
OP_HINT_SET_POLYGON_OFFSET_FRONT :Writeln('\HINT_SET_POLYGON_OFFSET_FRONT');
OP_HINT_SET_POLYGON_OFFSET_BACK :Writeln('\HINT_SET_POLYGON_OFFSET_BACK');
OP_HINT_SET_GS_MODE_DISABLE :Writeln('\HINT_SET_GS_MODE_DISABLE');
OP_HINT_SET_STREAMOUT_MAPPING :Writeln('\HINT_SET_STREAMOUT_MAPPING');
OP_HINT_SET_AA_SAMPLE_COUNT :Writeln('\HINT_SET_AA_SAMPLE_COUNT');
OP_HINT_SET_VERTEX_QUANTIZATION :Writeln('\HINT_SET_VERTEX_QUANTIZATION');
OP_HINT_SET_GUARD_BANDS :Writeln('\HINT_SET_GUARD_BANDS');
OP_HINT_SET_AA_SAMPLE_MASK1 :Writeln('\HINT_SET_AA_SAMPLE_MASK1');
OP_HINT_SET_AA_SAMPLE_MASK2 :Writeln('\HINT_SET_AA_SAMPLE_MASK2');
OP_HINT_SET_TEXTURE_GRADIENT_FACTORS :Writeln('\HINT_SET_TEXTURE_GRADIENT_FACTORS');
OP_HINT_SET_PERF_COUNTER_CONTROL_PA :Writeln('\HINT_SET_PERF_COUNTER_CONTROL_PA');
OP_HINT_SET_PRIMITIVE_TYPE_NEO :Writeln('\HINT_SET_PRIMITIVE_TYPE_NEO');
{$endif}
OP_HINT_PREPARE_FLIP_VOID:
begin
onPrepareFlip();
{$ifdef ww}Writeln('\HINT_PREPARE_FLIP_VOID');{$endif}
end;
OP_HINT_PREPARE_FLIP_LABEL:
begin
{$ifdef ww}Writeln('\HINT_PREPARE_FLIP_LABEL');{$endif}
onPrepareFlipLabel(pm4Hdr,@Body[1]);
end;
OP_HINT_PREPARE_FLIP_WITH_EOP_INTERRUPT_VOID:
begin
{$ifdef ww}Writeln('\HINT_PREPARE_FLIP_WITH_EOP_INTERRUPT_VOID');{$endif}
onPrepareFlipWithEopInterrupt(pm4Hdr,@Body[1]);
end;
OP_HINT_PREPARE_FLIP_WITH_EOP_INTERRUPT_LABEL:
begin
{$ifdef ww}Writeln('\HINT_PREPARE_FLIP_WITH_EOP_INTERRUPT_LABEL');{$endif}
onPrepareFlipWithEopInterruptLabel(pm4Hdr,@Body[1]);
end;
{$ifdef ww}else
Writeln('\Hint:',HexStr(Body^,8));{$endif}
end;
end;
procedure onContextControl(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDCONTEXTCONTROL);
begin
{$ifdef ww}writeln;{$endif}
end;
//The purpose of the Clear_State packet is to reduce command buffer preamble setup time for all driver versions of
//both DX and OpenGL and to specifically support DX11s Display Lists requirements. The definition of Clear State
//is essentially everything off, resources all NULL, other values set to a defined default state.
procedure onClearState(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDCLEARSTATE);
begin
GPU_REGS.Clear;
end;
procedure SetContextReg(reg:WORD;value:DWORD);
begin
GFXRing.LastSetReg:=reg;
Case reg of
mmCB_COLOR0_BASE..mmCB_COLOR7_DCC_BASE:
begin
PDWORD(@GPU_REGS.RENDER_TARGET)[reg-mmCB_COLOR0_BASE]:=value;
end;
mmCB_TARGET_MASK :DWORD(GPU_REGS.TARGET_MASK) :=value;
mmCB_COLOR_CONTROL:DWORD(GPU_REGS.CB_COLOR_CONTROL):=value;
mmCB_BLEND_RED..mmCB_BLEND_ALPHA:
begin
PDWORD(@GPU_REGS.CB_BLEND_RGBA)[reg-mmCB_BLEND_RED]:=value;
end;
mmCB_BLEND0_CONTROL..mmCB_BLEND7_CONTROL:
begin
PDWORD(@GPU_REGS.CB_BLEND_CONTROL)[reg-mmCB_BLEND0_CONTROL]:=value;
end;
mmCB_SHADER_MASK :DWORD(GPU_REGS.SPI.PS.SHADER_MASK):=value;
mmPA_SC_MODE_CNTL_0:DWORD(GPU_REGS.SC_MODE_CNTL_0) :=value;
mmPA_SC_MODE_CNTL_1:DWORD(GPU_REGS.SC_MODE_CNTL_1) :=value;
mmPA_SC_GENERIC_SCISSOR_TL:DWORD(GPU_REGS.GENERIC_SCISSOR.TL) :=value;
mmPA_SC_GENERIC_SCISSOR_BR:DWORD(GPU_REGS.GENERIC_SCISSOR.BR) :=value;
mmPA_SC_VPORT_SCISSOR_0_TL..mmPA_SC_VPORT_SCISSOR_15_BR:
begin
PDWORD(@GPU_REGS.VPORT_SCISSOR)[reg-mmPA_SC_VPORT_SCISSOR_0_TL]:=value;
end;
mmPA_SC_VPORT_ZMIN_0..mmPA_SC_VPORT_ZMAX_15:
begin
PDWORD(@GPU_REGS.VPORT_ZMIN_MAX)[reg-mmPA_SC_VPORT_ZMIN_0]:=value;
end;
mmPA_CL_VPORT_XSCALE..mmPA_CL_VPORT_ZOFFSET_15:
begin
PDWORD(@GPU_REGS.VPORT_SCALE_OFFSET)[reg-mmPA_CL_VPORT_XSCALE]:=value;
end;
mmPA_CL_VTE_CNTL:DWORD(GPU_REGS.VTE_CNTL):=value;
mmPA_SC_SCREEN_SCISSOR_TL:DWORD(GPU_REGS.SCREEN_SCISSOR_TL):=value;
mmPA_SC_SCREEN_SCISSOR_BR:DWORD(GPU_REGS.SCREEN_SCISSOR_BR):=value;
mmPA_SC_AA_MASK_X0Y0_X1Y0:DWORD(GPU_REGS.SC_AA_MASK_X0Y0_X1Y0):=value;
mmPA_SC_AA_MASK_X0Y1_X1Y1:DWORD(GPU_REGS.SC_AA_MASK_X0Y1_X1Y1):=value;
mmPA_SC_AA_CONFIG :DWORD(GPU_REGS.SC_AA_CONFIG):=value;
mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0..mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3:
begin
PDWORD(@GPU_REGS.SC_AA_SAMPLE_LOCS_PIXEL)[reg-mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0]:=value;
end;
mmPA_SC_CENTROID_PRIORITY_0..mmPA_SC_CENTROID_PRIORITY_1:
begin
PDWORD(@GPU_REGS.SC_CENTROID_PRIORITY)[reg-mmPA_SC_CENTROID_PRIORITY_0]:=value;
end;
mmPA_SU_HARDWARE_SCREEN_OFFSET:DWORD(GPU_REGS.HARDWARE_SCREEN_OFFSET):=value;
mmPA_SU_VTX_CNTL:DWORD(GPU_REGS.VTX_CNTL):=value;
mmPA_SU_LINE_CNTL:DWORD(GPU_REGS.SU_LINE_CNTL) :=value;
mmPA_SU_POINT_SIZE:DWORD(GPU_REGS.SU_POINT_SIZE) :=value;
mmPA_SU_POINT_MINMAX:DWORD(GPU_REGS.SU_POINT_MINMAX):=value;
mmPA_CL_CLIP_CNTL:DWORD(GPU_REGS.CL_CLIP_CNTL) :=value;
mmPA_SC_CLIPRECT_RULE:DWORD(GPU_REGS.SC_CLIPRECT_RULE):=value;
mmPA_SU_SC_MODE_CNTL:DWORD(GPU_REGS.SC_MODE_CNTL):=value;
mmPA_CL_GB_VERT_CLIP_ADJ:PDWORD(@GPU_REGS.GB_CLIP.VERT_CLIP_ADJ)^:=value;
mmPA_CL_GB_VERT_DISC_ADJ:PDWORD(@GPU_REGS.GB_CLIP.VERT_DISC_ADJ)^:=value;
mmPA_CL_GB_HORZ_CLIP_ADJ:PDWORD(@GPU_REGS.GB_CLIP.HORZ_CLIP_ADJ)^:=value;
mmPA_CL_GB_HORZ_DISC_ADJ:PDWORD(@GPU_REGS.GB_CLIP.HORZ_DISC_ADJ)^:=value;
mmSPI_VS_OUT_CONFIG :DWORD(GPU_REGS.SPI.VS.OUT_CONFIG):=value;
mmPA_CL_VS_OUT_CNTL :DWORD(GPU_REGS.SPI.VS.OUT_CNTL):=value;
mmSPI_SHADER_POS_FORMAT:DWORD(GPU_REGS.SPI.VS.POS_FORMAT):=value;
mmSPI_SHADER_Z_FORMAT :DWORD(GPU_REGS.SPI.PS.Z_FORMAT) :=value;
mmSPI_SHADER_COL_FORMAT:DWORD(GPU_REGS.SPI.PS.COL_FORMAT):=value;
mmSPI_BARYC_CNTL :DWORD(GPU_REGS.SPI.PS.BARYC_CNTL):=value;
mmSPI_PS_INPUT_ENA :DWORD(GPU_REGS.SPI.PS.INPUT_ENA) :=value;
mmSPI_PS_INPUT_ADDR :DWORD(GPU_REGS.SPI.PS.INPUT_ADDR):=value;
mmSPI_PS_IN_CONTROL :DWORD(GPU_REGS.SPI.PS.IN_CONTROL):=value;
mmSPI_PS_INPUT_CNTL_0..mmSPI_PS_INPUT_CNTL_31:
begin
PDWORD(@GPU_REGS.SPI.PS.INPUT_CNTL)[reg-mmSPI_PS_INPUT_CNTL_0]:=value;
end;
mmDB_SHADER_CONTROL :DWORD(GPU_REGS.SPI.PS.SHADER_CONTROL):=value;
mmDB_RENDER_CONTROL :DWORD(GPU_REGS.DEPTH.RENDER_CONTROL):=value;
mmDB_DEPTH_CONTROL :DWORD(GPU_REGS.DEPTH.DEPTH_CONTROL):=value;
mmDB_DEPTH_VIEW :DWORD(GPU_REGS.DEPTH.DEPTH_VIEW ):=value;
mmDB_HTILE_DATA_BASE :DWORD(GPU_REGS.DEPTH.HTILE_DATA_BASE ):=value;
mmDB_DEPTH_BOUNDS_MIN :DWORD(GPU_REGS.DEPTH.DEPTH_BOUNDS_MIN ):=value;
mmDB_DEPTH_BOUNDS_MAX :DWORD(GPU_REGS.DEPTH.DEPTH_BOUNDS_MAX ):=value;
mmDB_STENCIL_CLEAR :DWORD(GPU_REGS.DEPTH.STENCIL_CLEAR ):=value;
mmDB_DEPTH_CLEAR :DWORD(GPU_REGS.DEPTH.DEPTH_CLEAR ):=value;
mmDB_DEPTH_INFO :DWORD(GPU_REGS.DEPTH.DEPTH_INFO ):=value;
mmDB_Z_INFO :DWORD(GPU_REGS.DEPTH.Z_INFO ):=value;
mmDB_STENCIL_INFO :DWORD(GPU_REGS.DEPTH.STENCIL_INFO ):=value;
mmDB_Z_READ_BASE :DWORD(GPU_REGS.DEPTH.Z_READ_BASE ):=value;
mmDB_STENCIL_READ_BASE :DWORD(GPU_REGS.DEPTH.STENCIL_READ_BASE ):=value;
mmDB_Z_WRITE_BASE :DWORD(GPU_REGS.DEPTH.Z_WRITE_BASE ):=value;
mmDB_STENCIL_WRITE_BASE:DWORD(GPU_REGS.DEPTH.STENCIL_WRITE_BASE):=value;
mmDB_DEPTH_SIZE :DWORD(GPU_REGS.DEPTH.DEPTH_SIZE ):=value;
mmDB_DEPTH_SLICE :DWORD(GPU_REGS.DEPTH.DEPTH_SLICE ):=value;
mmDB_HTILE_SURFACE :DWORD(GPU_REGS.DEPTH.HTILE_SURFACE ):=value;
mmDB_EQAA :DWORD(GPU_REGS.DEPTH.EQAA):=value;
mmDB_COUNT_CONTROL :DWORD(GPU_REGS.DEPTH.COUNT_CONTROL):=value;
mmDB_STENCIL_CONTROL :DWORD(GPU_REGS.DEPTH.STENCIL_CONTROL ):=value;
mmDB_STENCILREFMASK :DWORD(GPU_REGS.DEPTH.STENCILREFMASK_FF):=value;
mmDB_STENCILREFMASK_BF :DWORD(GPU_REGS.DEPTH.STENCILREFMASK_BF):=value;
mmVGT_SHADER_STAGES_EN :DWORD(GPU_REGS.VGT_SHADER_STAGES_EN) :=value;
mmVGT_OUT_DEALLOC_CNTL :DWORD(GPU_REGS.VGT_OUT_DEALLOC_CNTL) :=value;
mmVGT_VTX_CNT_EN :DWORD(GPU_REGS.VGT_VTX_INDX.CNT_EN):=value;
mmVGT_MIN_VTX_INDX :DWORD(GPU_REGS.VGT_VTX_INDX.MIN_INDX):=value;
mmVGT_MAX_VTX_INDX :DWORD(GPU_REGS.VGT_VTX_INDX.MAX_INDX):=value;
mmVGT_INDX_OFFSET :DWORD(GPU_REGS.VGT_VTX_INDX.INDX_OFFSET):=value;
mmVGT_MULTI_PRIM_IB_RESET_EN :DWORD(GPU_REGS.VGT_MULTI_PRIM.IB_RESET_EN):=value;
mmVGT_MULTI_PRIM_IB_RESET_INDX:DWORD(GPU_REGS.VGT_MULTI_PRIM.IB_RESET_INDX):=value;
mmVGT_OUTPUT_PATH_CNTL:DWORD(GPU_REGS.VGT_OUTPUT_PATH_CNTL):=value;
mmVGT_GS_MODE:DWORD(GPU_REGS.VGT_GS_MODE):=value;
mmVGT_GS_PER_ES:DWORD(GPU_REGS.VGT_GS_PER_ES):=value;
mmVGT_ES_PER_GS:DWORD(GPU_REGS.VGT_ES_PER_GS):=value;
mmVGT_GS_PER_VS:DWORD(GPU_REGS.VGT_GS_PER_VS):=value;
mmPA_SU_POLY_OFFSET_DB_FMT_CNTL:DWORD(GPU_REGS.PA_SU_POLY_OFFSET_DB_FMT_CNTL):=value;
mmIA_MULTI_VGT_PARAM:DWORD(GPU_REGS.IA_MULTI_VGT_PARAM):=value;
{$ifdef ww}else
Writeln('SetContextReg:',getRegName(reg),'=',HexStr(value,8));{$endif}
end;
end;
procedure SetShReg(reg:WORD;value:DWORD);
begin
GFXRing.LastSetReg:=reg;
Case reg of
mmSPI_SHADER_PGM_LO_PS :GPU_REGS.SPI.PS.LO:=value;
mmSPI_SHADER_PGM_HI_PS :GPU_REGS.SPI.PS.HI:=value;
mmSPI_SHADER_PGM_RSRC1_PS:DWORD(GPU_REGS.SPI.PS.RSRC1):=value;
mmSPI_SHADER_PGM_RSRC2_PS:DWORD(GPU_REGS.SPI.PS.RSRC2):=value;
mmSPI_SHADER_PGM_RSRC3_PS:DWORD(GPU_REGS.SPI.PS.RSRC3):=value;
mmSPI_SHADER_USER_DATA_PS_0..mmSPI_SHADER_USER_DATA_PS_15:
PDWORD(@GPU_REGS.SPI.PS.USER_DATA)[reg-mmSPI_SHADER_USER_DATA_PS_0]:=value;
mmSPI_SHADER_PGM_LO_VS :GPU_REGS.SPI.VS.LO:=value;
mmSPI_SHADER_PGM_HI_VS :GPU_REGS.SPI.VS.HI:=value;
mmSPI_SHADER_PGM_RSRC1_VS:DWORD(GPU_REGS.SPI.VS.RSRC1):=value;
mmSPI_SHADER_PGM_RSRC2_VS:DWORD(GPU_REGS.SPI.VS.RSRC2):=value;
mmSPI_SHADER_PGM_RSRC3_VS:DWORD(GPU_REGS.SPI.VS.RSRC3):=value;
mmSPI_SHADER_USER_DATA_VS_0..mmSPI_SHADER_USER_DATA_VS_15:
PDWORD(@GPU_REGS.SPI.VS.USER_DATA)[reg-mmSPI_SHADER_USER_DATA_VS_0]:=value;
mmSPI_SHADER_LATE_ALLOC_VS:DWORD(GPU_REGS.SPI.VS.LATE_ALLOC):=value;
//mmSPI_SHADER_PGM_RSRC3_GS:value:=value;
//mmSPI_SHADER_PGM_RSRC3_ES:value:=value;
//mmSPI_SHADER_PGM_RSRC3_HS:value:=value;
//mmSPI_SHADER_PGM_RSRC3_LS:value:=value;
mmCOMPUTE_PGM_LO :GPU_REGS.SPI.CS.LO:=value;
mmCOMPUTE_PGM_HI :GPU_REGS.SPI.CS.HI:=value;
mmCOMPUTE_PGM_RSRC1 :DWORD(GPU_REGS.SPI.CS.RSRC1):=value;
mmCOMPUTE_PGM_RSRC2 :DWORD(GPU_REGS.SPI.CS.RSRC2):=value;
mmCOMPUTE_NUM_THREAD_X :DWORD(GPU_REGS.SPI.CS.NUM_THREAD_X):=value;
mmCOMPUTE_NUM_THREAD_Y :DWORD(GPU_REGS.SPI.CS.NUM_THREAD_Y):=value;
mmCOMPUTE_NUM_THREAD_Z :DWORD(GPU_REGS.SPI.CS.NUM_THREAD_Z):=value;
mmCOMPUTE_USER_DATA_0..mmCOMPUTE_USER_DATA_15:
PDWORD(@GPU_REGS.SPI.CS.USER_DATA)[reg-mmCOMPUTE_USER_DATA_0]:=value;
mmCOMPUTE_STATIC_THREAD_MGMT_SE0:DWORD(GPU_REGS.SPI.CS.STATIC_THREAD_MGMT_SE0):=value;
mmCOMPUTE_STATIC_THREAD_MGMT_SE1:DWORD(GPU_REGS.SPI.CS.STATIC_THREAD_MGMT_SE1):=value;
mmCOMPUTE_RESOURCE_LIMITS :DWORD(GPU_REGS.SPI.CS.RESOURCE_LIMITS):=value;
{$ifdef ww}else
Writeln('onSetShReg:',getRegName(reg),'=',HexStr(value,8));{$endif}
end;
end;
procedure SetUContextReg(reg:WORD;value:DWORD);
begin
GFXRing.LastSetReg:=reg;
Case reg of
mmVGT_PRIMITIVE_TYPE:DWORD(GPU_REGS.VGT_PRIMITIVE_TYPE):=value;
mmVGT_INDEX_TYPE :DWORD(GPU_REGS.VGT_INDEX_TYPE ):=value;
mmVGT_NUM_INSTANCES :DWORD(GPU_REGS.VGT_NUM_INSTANCES ):=value;
mmGRBM_GFX_INDEX :DWORD(GPU_REGS.GRBM_GFX_INDEX ):=value;
{$ifdef ww}else
Writeln('SetUContextReg:',getRegName(reg),'=',HexStr(value,8));{$endif}
end;
end;
const
CONTEXT_REG_BASE = $A000;
CONTEXT_REG_END = $A400;
CONTEXT_REG_SIZE = CONTEXT_REG_END - CONTEXT_REG_BASE;
procedure onSetContextReg(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDSETDATA);
var
i,c,r:WORD;
v:DWORD;
begin
c:=pm4Hdr.count;
if (c<>0) then
For i:=0 to c-1 do
begin
r:=CONTEXT_REG_BASE+Body^.REG_OFFSET+i;
v:=PDWORD(@Body^.REG_DATA)[i];
//{$ifdef ww}Writeln('SetContextReg:',getRegName(r),'=',HexStr(v,8));{$endif}
//Continue;
Inc(GFXRing.SetCxCount);
SetContextReg(r,v);
end;
end;
const
SH_REG_BASE = $2C00;
SH_REG_END = $3000;
SH_REG_SIZE = SH_REG_END - SH_REG_BASE;
procedure onSetShReg(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDSETDATA);
var
i,c,r:WORD;
v:DWORD;
begin
c:=pm4Hdr.count;
if (c<>0) then
For i:=0 to c-1 do
begin
r:=SH_REG_BASE+Body^.REG_OFFSET+i;
v:=PDWORD(@Body^.REG_DATA)[i];
//{$ifdef ww}Writeln('SetShReg:',getRegName(r),'=',HexStr(v,8));{$endif}
//Continue;
Inc(GFXRing.SetShCount);
SetShReg(r,v);
end;
end;
type
PVGT_PRIMITIVE_TYPE=^TVGT_PRIMITIVE_TYPE;
PGRBM_GFX_INDEX=^TGRBM_GFX_INDEX;
Const
USERCONFIG_REG_BASE = $0C000;
USERCONFIG_REG_END = $10000;
USERCONFIG_REG_SIZE = USERCONFIG_REG_END - USERCONFIG_REG_BASE;
procedure onSetUConfigReg(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDSETDATA);
var
i,c,r:WORD;
v:DWORD;
begin
//r:=Body^.REG_OFFSET+$C000;
//mmVGT_PRIMITIVE_TYPE__CI__VI = 0xC242;
//mmVGT_INDEX_TYPE__CI__VI = 0xC243;
//mmVGT_NUM_INSTANCES__CI__VI = 0xC24D;
c:=pm4Hdr.count;
if (c<>0) then
For i:=0 to c-1 do
begin
r:=USERCONFIG_REG_BASE+Body^.REG_OFFSET+i;
v:=PDWORD(@Body^.REG_DATA)[i];
//{$ifdef ww}Writeln('SetUConfigReg:',getRegName(r),'=',HexStr(v,8));{$endif}
SetUContextReg(r,v);
end;
end;
const
CONFIG_SPACE_START=$2000;
procedure onSetConfigReg(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDSETDATA);
var
i,c,r:WORD;
v:DWORD;
begin
c:=pm4Hdr.count;
if (c<>0) then
For i:=0 to c-1 do
begin
r:=CONFIG_SPACE_START+Body^.REG_OFFSET+i;
v:=PDWORD(@Body^.REG_DATA)[i];
SetContextReg(r,v);
end;
end;
procedure onIndexBufferSize(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDDRAWINDEXBUFFERSIZE);
begin
GPU_REGS.VGT_DMA.SIZE :=Body^.numIndices;
GPU_REGS.VGT_DMA.INDICES :=Body^.numIndices;
end;
type
PVGT_DMA_INDEX_TYPE=^TVGT_DMA_INDEX_TYPE;
procedure onIndexType(pm4Hdr:PM4_TYPE_3_HEADER;Body:PVGT_DMA_INDEX_TYPE);
begin
GPU_REGS.VGT_DMA.INDEX_TYPE:=Body^;
{$ifdef ww}
Case Body^.INDEX_TYPE of
VGT_INDEX_16:Write('VGT_INDEX_16');
VGT_INDEX_32:Write('VGT_INDEX_32');
VGT_INDEX_8 :Write('VGT_INDEX_8');
else Write('VGT_INDEX_UNKNOW');
end;
Writeln;
{$endif}
end;
//SLICE.TILE_MAX number of tiles in a slice (equal to Pitch * Height / 64),
//PITCH.TILE_MAX = 159, //(PITCH.TILE_MAX+1)*8=1280
//SLICE.TILE_MAX = 15359, //(SLICE.TILE_MAX+1)/(PITCH.TILE_MAX+1)*8=768
type
TvEvent2=class(TvEvent)
Procedure Release(Sender:TObject);
end;
Procedure TvEvent2.Release(Sender:TObject);
begin
Free;
end;
//VK_IMAGE_USAGE_STORAGE_BIT
const
VK_IMAGE_USAGE_DEFAULT=
ord(VK_IMAGE_USAGE_TRANSFER_SRC_BIT) or
ord(VK_IMAGE_USAGE_TRANSFER_DST_BIT) or
ord(VK_IMAGE_USAGE_SAMPLED_BIT);
VK_IMAGE_USAGE_DEFAULT_DEPTH=
VK_IMAGE_USAGE_DEFAULT or
ord(VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
function VK_IMAGE_USAGE_DEFAULT_COLOR(cformat:TVkFormat):TVkFlags;
begin
Case cformat of
VK_FORMAT_R4G4_UNORM_PACK8..
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
VK_FORMAT_A8B8G8R8_UNORM_PACK32..
VK_FORMAT_A2B10G10R10_SINT_PACK32,
VK_FORMAT_B10G11R11_UFLOAT_PACK32..
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
VK_FORMAT_D16_UNORM..
VK_FORMAT_BC7_SRGB_BLOCK:
Result:=VK_IMAGE_USAGE_DEFAULT;
else
Result:=VK_IMAGE_USAGE_DEFAULT or
ord(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
end;
end;
procedure ClearRenderTarget;
var
RT_INFO:TRT_INFO;
ri:TvImage2;
range:TVkImageSubresourceRange;
begin
Assert(DWORD(GPU_REGS.TARGET_MASK)=$F);
RT_INFO:=GPU_REGS.GET_RT_INFO(0);
GFXRing.AllocCmdBuffer;
GFXRing.CmdBuffer.EndRenderPass;
ri:=FetchImage(GFXRing.CmdBuffer,
RT_INFO.FImageInfo,
VK_IMAGE_USAGE_DEFAULT_COLOR(RT_INFO.FImageInfo.cformat),
TM_CLEAR
);
ri.PushBarrier(GFXRing.CmdBuffer,
ord(VK_ACCESS_TRANSFER_WRITE_BIT),
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
range:=ri.GetSubresRange;
GFXRing.CmdBuffer.ClearColorImage(ri.FHandle,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
@RT_INFO.CLEAR_COLOR,
1,@range);
end;
procedure ResolveRenderTarget;
var
RT_INFO_SRC,RT_INFO_DST:TRT_INFO;
ri_src,ri_dst:TvImage2;
vport:TVkRect2D;
range:TVkImageResolve;
begin
Assert(DWORD(GPU_REGS.TARGET_MASK)=$F);
RT_INFO_SRC:=GPU_REGS.GET_RT_INFO(0);
RT_INFO_DST:=GPU_REGS.GET_RT_INFO(1);
vport:=GPU_REGS.GET_SCISSOR(0);
GFXRing.AllocCmdBuffer;
GFXRing.CmdBuffer.EndRenderPass;
ri_src:=FetchImage(GFXRing.CmdBuffer,
RT_INFO_SRC.FImageInfo,
VK_IMAGE_USAGE_DEFAULT_COLOR(RT_INFO_SRC.FImageInfo.cformat),
{TM_READ}0
);
ri_dst:=FetchImage(GFXRing.CmdBuffer,
RT_INFO_DST.FImageInfo,
VK_IMAGE_USAGE_DEFAULT_COLOR(RT_INFO_DST.FImageInfo.cformat),
TM_WRITE
);
ri_src.PushBarrier(GFXRing.CmdBuffer,
ord(VK_ACCESS_TRANSFER_READ_BIT),
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
ri_dst.PushBarrier(GFXRing.CmdBuffer,
ord(VK_ACCESS_TRANSFER_WRITE_BIT),
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
range:=Default(TVkImageResolve);
range.srcSubresource:=ri_src.GetSubresLayer;
range.dstSubresource:=ri_dst.GetSubresLayer;
range.srcOffset.Create(vport.offset.x,vport.offset.y,0);
range.dstOffset:=range.srcOffset;
range.extent.Create(vport.extent.width,vport.extent.height,1);
GFXRing.CmdBuffer.ResolveImage(
ri_src.FHandle,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
ri_dst.FHandle,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1,@range);
end;
procedure ClearDepthTarget(ri:TvImage2;clr:TVkClearDepthStencilValue);
var
range:TVkImageSubresourceRange;
begin
ri.PushBarrier(GFXRing.CmdBuffer,
ord(VK_ACCESS_TRANSFER_WRITE_BIT),
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
range:=ri.GetSubresRange;
GFXRing.CmdBuffer.ClearDepthStencilImage(ri.FHandle,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
@clr,
1,@range);
end;
var
LastSetShCount:ptruint;
LastSetCxCount:ptruint;
LastRenderCmd:TvRenderTargets;
function FindImageInFrameBuffer(f:TvFramebuffer;ri:TvImage2):Boolean;
var
i:ptruint;
iv:TvImageView2;
begin
Result:=False;
if (f=nil) or (ri=nil) then Exit;
if (f.FImagesCount<>0) then
For i:=0 to f.FImagesCount-1 do
begin
iv:=TvImageView2(f.FImages[i]);
if (iv.Parent=ri) then Exit(True);
end;
end;
function UpdateGpuRegsInfo:Boolean;
var
FAttrBuilder:TvAttrBuilder;
FUniformBuilder:TvUniformBuilder;
i,o,a:DWORD;
pData:Pointer;
FRenderCmd:TvRenderTargets;
RT_INFO:TRT_INFO;
DB_INFO:TDB_INFO;
ri:TvImage2;
iv:TvImageView2;
sm:TvSampler;
range:TVkImageSubresourceRange;
clr:TVkClearColorValue;
BufferImageCopy:TVkBufferImageCopy;
fdump_ps,fdump_vs:RawByteString;
buf:TvHostBuffer;
FDescriptorGroup:TvDescriptorGroup;
FVSShader:TvShaderExt;
FPSShader:TvShaderExt;
FShadersKey:TvShadersKey;
FShaderGroup:TvShaderGroup;
ctx_change:Boolean;
Event:TvEvent2;
pa:TPushConstAllocator;
begin
Result:=True;
{$ifdef null_rt}Exit;{$endif}
if (LastSetShCount=GFXRing.SetShCount) and
(LastSetCxCount=GFXRing.SetCxCount) then
begin
Exit;
end;
ctx_change:=(LastSetCxCount<>GFXRing.SetCxCount);
ctx_change:=True;
LastSetShCount:=GFXRing.SetShCount;
LastSetCxCount:=GFXRing.SetCxCount;
fdump_ps:=DumpPS(GPU_REGS);
fdump_vs:=DumpVS(GPU_REGS);
{$ifdef ww}Writeln(fdump_vs);{$endif}
{$ifdef ww}Writeln(fdump_ps);{$endif}
Case GPU_REGS.CB_COLOR_CONTROL.MODE of
CB_DISABLE :
begin
{$ifdef ww}Writeln('CB_DISABLE');{$endif}
Exit(False);
end;
CB_NORMAL :; //continue
CB_ELIMINATE_FAST_CLEAR:
begin
ClearRenderTarget;
Exit(False);
end;
CB_RESOLVE :
begin
ResolveRenderTarget;
Exit(False);
end;
CB_DECOMPRESS :
begin
{$ifdef ww}Writeln('CB_DECOMPRESS');{$endif}
Exit(False);
end;
CB_FMASK_DECOMPRESS :
begin
{$ifdef ww}Writeln('CB_FMASK_DECOMPRESS');{$endif}
Exit(False);
end;
CB_DCC_DECOMPRESS :
begin
{$ifdef ww}Writeln('CB_DCC_DECOMPRESS');{$endif}
Exit(False);
end;
else
Assert(false,'CB_COLOR_CONTROL.MODE:'+IntToStr(GPU_REGS.CB_COLOR_CONTROL.MODE));
end;
if not (GPU_REGS.COMP_ENABLE or GPU_REGS.DB_ENABLE) then Exit(False);
GFXRing.AllocCmdBuffer;
///////////////////
if ctx_change then
begin
FRenderCmd:=TvRenderTargets.Create;
LastRenderCmd:=FRenderCmd;
FRenderCmd.FRenderPass:=TvRenderPass.Create;
FRenderCmd.FPipeline :=TvGraphicsPipeline.Create;
FRenderCmd.FPipeline.FRenderPass:=FRenderCmd.FRenderPass;
FRenderCmd.FFramebuffer:=TvFramebuffer.Create;
FRenderCmd.FFramebuffer.SetRenderPass(FRenderCmd.FRenderPass);
//////////////////
FRenderCmd.FFramebuffer.FreeImageViews;
FRenderCmd.FRenderPass.Clear;
FRenderCmd.FPipeline.Clear;
FRenderCmd.FFramebuffer.SetSize(GPU_REGS.GET_SCREEN_SIZE);
FRenderCmd.FPipeline.SetPrimType (GPU_REGS.GET_PRIM_TYPE);
FRenderCmd.FPipeline.SetPrimReset(GPU_REGS.VGT_MULTI_PRIM.IB_RESET_EN.RESET_EN);
FRenderCmd.FPipeline.SetBlendColors(@GPU_REGS.CB_BLEND_RGBA);
//FRenderCmd.FPipeline.multisampling.sampleShadingEnable :=VK_FALSE;
//FRenderCmd.FPipeline.multisampling.rasterizationSamples :=TVkSampleCountFlagBits(1 shl GPU_REGS.SC_AA_CONFIG.MSAA_NUM_SAMPLES);
//FRenderCmd.FPipeline.multisampling.minSampleShading :=1;
//FRenderCmd.FPipeline.multisampling.pSampleMask :=nil;
//FRenderCmd.FPipeline.multisampling.alphaToCoverageEnable:=VK_FALSE;
//FRenderCmd.FPipeline.multisampling.alphaToOneEnable :=VK_FALSE;
//SC_MODE_CNTL_0 = {
//MSAA_ENABLE = 1,
FRenderCmd.FRenderArea:=GPU_REGS.GET_SCREEN;
For i:=0 to 15 do
if GPU_REGS.VP_ENABLE(i) then
begin
FRenderCmd.FPipeline.AddVPort(GPU_REGS.GET_VPORT(i),GPU_REGS.GET_SCISSOR(i));
end;
GFXRing.CmdBuffer.EndRenderPass;
if GPU_REGS.COMP_ENABLE then
For i:=0 to 7 do
if GPU_REGS.RT_ENABLE(i) then
begin
RT_INFO:=GPU_REGS.GET_RT_INFO(i);
{$ifdef ww}
Writeln('RT:',i,' ',HexStr(RT_INFO.FImageInfo.Addr));
Writeln(' TM_READ :',RT_INFO.IMAGE_USAGE and TM_READ <>0);
Writeln(' TM_WRITE:',RT_INFO.IMAGE_USAGE and TM_WRITE<>0);
Writeln(' TM_CLEAR:',RT_INFO.IMAGE_USAGE and TM_CLEAR<>0);
Writeln(' format:',RT_INFO.FImageInfo.cformat);
Writeln(' size:',RT_INFO.FImageInfo.params.extend.width,'x',RT_INFO.FImageInfo.params.extend.height);
Writeln(' samples:',RT_INFO.FImageInfo.params.samples);
{$endif}
//if (RT_INFO.FImageInfo.params.samples>ord(FRenderCmd.FPipeline.multisampling.rasterizationSamples)) then
//begin
// FRenderCmd.FPipeline.multisampling.rasterizationSamples:=TVkSampleCountFlagBits(RT_INFO.FImageInfo.params.samples);
//end;
RT_INFO.FImageInfo.params.samples:=1;
//if (RT_INFO.IMAGE_USAGE and TM_READ)=0 then
//begin
// RT_INFO.IMAGE_USAGE:=RT_INFO.IMAGE_USAGE or TM_CLEAR;
//end;
//if (RT_INFO.IMAGE_USAGE and TM_CLEAR)=0 then
//begin
// RT_INFO.IMAGE_USAGE:=RT_INFO.IMAGE_USAGE or TM_READ;
//end;
//if (RT_INFO.blend.blendEnable<>0) then
//begin
// Exit(false);
// RT_INFO.IMAGE_USAGE:=RT_INFO.IMAGE_USAGE or TM_CLEAR;
//end;
//RT_INFO.IMAGE_USAGE:=RT_INFO.IMAGE_USAGE or TM_CLEAR;
//RT_INFO.IMAGE_USAGE:=RT_INFO.IMAGE_USAGE and (not TM_READ);
ri:=FetchImage(GFXRing.CmdBuffer,
RT_INFO.FImageInfo,
VK_IMAGE_USAGE_DEFAULT_COLOR(RT_INFO.FImageInfo.cformat),
RT_INFO.IMAGE_USAGE
);
//if (RT_INFO.blend.blendEnable=0) then
//begin
// ri.data_usage:=ri.data_usage and (not TM_READ); //reset read
//end;
iv:=ri.FetchView(GFXRing.CmdBuffer,RT_INFO.FImageView);
//
//Writeln(hexstr(PDWORD(RT_INFO.FImageInfo.Addr)[0],8));
//writeln;
//RT_INFO.IMAGE_USAGE:={TM_CLEAR or }TM_READ{ or TM_WRITE};
//RT_INFO.IMAGE_USAGE:=RT_INFO.IMAGE_USAGE and (not TM_CLEAR);
ri.PushBarrier(GFXRing.CmdBuffer,
ord(VK_ACCESS_TRANSFER_READ_BIT),
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
ri.PushBarrier(GFXRing.CmdBuffer,
GetColorAccessMask(RT_INFO.IMAGE_USAGE),
{VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}VK_IMAGE_LAYOUT_GENERAL,
ord(VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) or
ord(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT) );
FRenderCmd.FFramebuffer.AddImageView(iv);
//Writeln('colorAttachmentCount:',FRenderCmd.FRenderPass.subpass.colorAttachmentCount);
//Writeln('AtCount:',FRenderCmd.FRenderPass.AtCount);
FRenderCmd.FRenderPass.AddColorRef(FRenderCmd.FRenderPass.subpass.colorAttachmentCount,RT_INFO.IMAGE_USAGE);
FRenderCmd.FRenderPass.AddColorAt(RT_INFO.FImageInfo.cformat,
RT_INFO.IMAGE_USAGE,
TVkSampleCountFlagBits(RT_INFO.FImageInfo.params.samples));
//RT_INFO.blend.blendEnable:=0;
FRenderCmd.FPipeline.AddBlend(RT_INFO.blend);
//RT_INFO.CLEAR_COLOR.float32[0]:=1;
//RT_INFO.CLEAR_COLOR.float32[1]:=0;
//RT_INFO.CLEAR_COLOR.float32[2]:=0;
//RT_INFO.CLEAR_COLOR.float32[3]:=1;
FRenderCmd.AddClearColor(TVkClearValue(RT_INFO.CLEAR_COLOR));
end;
if DEPTH_DISABLE_HACK and (not GPU_REGS.COMP_ENABLE) then Exit(false);
if (not DEPTH_DISABLE_HACK) and GPU_REGS.DB_ENABLE {false} then
begin
DB_INFO:=GPU_REGS.GET_DB_INFO;
//if (DB_INFO.DEPTH_USAGE and TM_READ)=0 then
//begin
//DB_INFO.DEPTH_USAGE:=DB_INFO.DEPTH_USAGE or TM_CLEAR;
//end;
{$ifdef ww}
Writeln('DB');
Writeln(' TM_READ :',DB_INFO.DEPTH_USAGE and TM_READ <>0);
Writeln(' TM_WRITE:',DB_INFO.DEPTH_USAGE and TM_WRITE<>0);
Writeln(' TM_CLEAR:',DB_INFO.DEPTH_USAGE and TM_CLEAR<>0);
Writeln(' format:',DB_INFO.FImageInfo.cformat);
Writeln(' size:',DB_INFO.FImageInfo.params.extend.width,'x',DB_INFO.FImageInfo.params.extend.height);
Writeln(' samples:',DB_INFO.FImageInfo.params.samples);
{$endif}
//DB_INFO.DEPTH_USAGE:={TM_CLEAR or} TM_READ or TM_WRITE;
DB_INFO.FImageInfo.params.samples:=1;
//DB_INFO.FImageInfo.params.samples:=ord(FRenderCmd.FPipeline.multisampling.rasterizationSamples);
ri:=FetchImage(GFXRing.CmdBuffer,
DB_INFO.FImageInfo,
VK_IMAGE_USAGE_DEFAULT_DEPTH,
DB_INFO.DEPTH_USAGE
);
//ri.data_usage:=ri.data_usage and (not TM_READ); //reset read
iv:=ri.FetchView(GFXRing.CmdBuffer);
ri.PushBarrier(GFXRing.CmdBuffer,
ord(VK_ACCESS_TRANSFER_READ_BIT),
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
ord(VK_PIPELINE_STAGE_TRANSFER_BIT));
if not GPU_REGS.COMP_ENABLE then
begin
ClearDepthTarget(ri,DB_INFO.CLEAR_VALUE.depthStencil);
Exit(False);
end;
ri.PushBarrier(GFXRing.CmdBuffer,
GetDepthStencilAccessMask(DB_INFO.DEPTH_USAGE,DB_INFO.STENCIL_USAGE),
GetDepthStencilLayout (DB_INFO.DEPTH_USAGE,DB_INFO.STENCIL_USAGE),
DB_INFO.zorder_stage );
FRenderCmd.FFramebuffer.AddImageView(iv);
//Writeln('colorAttachmentCount:',FRenderCmd.FRenderPass.subpass.colorAttachmentCount);
//Writeln('AtCount:',FRenderCmd.FRenderPass.AtCount);
FRenderCmd.FRenderPass.SetDepthStencilRef(
FRenderCmd.FRenderPass.subpass.colorAttachmentCount,
DB_INFO.DEPTH_USAGE,DB_INFO.STENCIL_USAGE);
//if not GPU_REGS.COMP_ENABLE then
//begin
// DB_INFO.DEPTH_CLEAR:=True;
//end;
FRenderCmd.FRenderPass.AddDepthAt(
DB_INFO.FImageInfo.cformat,
DB_INFO.DEPTH_USAGE,
DB_INFO.STENCIL_USAGE);
FRenderCmd.FRenderPass.SetZorderStage(DB_INFO.zorder_stage);
FRenderCmd.AddClearColor(DB_INFO.CLEAR_VALUE);
FRenderCmd.FPipeline.DepthStencil.depthTestEnable :=DB_INFO.depthTestEnable ;
FRenderCmd.FPipeline.DepthStencil.depthWriteEnable :=DB_INFO.depthWriteEnable ;
FRenderCmd.FPipeline.DepthStencil.depthCompareOp :=DB_INFO.depthCompareOp ;
FRenderCmd.FPipeline.DepthStencil.depthBoundsTestEnable:=DB_INFO.depthBoundsTestEnable;
FRenderCmd.FPipeline.DepthStencil.stencilTestEnable :=DB_INFO.stencilTestEnable ;
FRenderCmd.FPipeline.DepthStencil.front :=DB_INFO.front ;
FRenderCmd.FPipeline.DepthStencil.back :=DB_INFO.back ;
FRenderCmd.FPipeline.DepthStencil.minDepthBounds :=DB_INFO.minDepthBounds ;
FRenderCmd.FPipeline.DepthStencil.maxDepthBounds :=DB_INFO.maxDepthBounds ;
end;
end else //ctx_change
begin
FRenderCmd:=LastRenderCmd;
end;
pa.Init;
//
{$ifdef ww}Writeln('[FPSShader]');{$endif}
FPSShader:=FetchShader(vShaderStagePs,1,GPU_REGS,@pa);
//if (FPSShader=nil) then Exit;
//
{$ifdef ww}Writeln('[FVSShader]');{$endif}
FVSShader:=FetchShader(vShaderStageVs,0,GPU_REGS,@pa);
if (FVSShader=nil) then Exit(False);
//
FAttrBuilder:=Default(TvAttrBuilder);
FVSShader.EnumVertLayout(@FAttrBuilder.AddAttr,FVSShader.FDescSetId,@GPU_REGS.SPI.VS.USER_DATA);
if (Length(FAttrBuilder.FBindDescs)<>0) then
begin
With FRenderCmd.FPipeline.vertexInputInfo do
begin
vertexBindingDescriptionCount :=Length(FAttrBuilder.FBindDescs);
pVertexBindingDescriptions :=@FAttrBuilder.FBindDescs[0];
vertexAttributeDescriptionCount:=Length(FAttrBuilder.FAttrDescs);
pVertexAttributeDescriptions :=@FAttrBuilder.FAttrDescs[0];
end;
end;
//
FShadersKey:=Default(TvShadersKey);
FShadersKey.SetVSShader(FVSShader);
FShadersKey.SetPSShader(FPSShader);
FShaderGroup:=FetchShaderGroup(@FShadersKey);
Assert(FShaderGroup<>nil);
//
FRenderCmd.FPipeline.FShaderGroup:=FShaderGroup;
FDescriptorGroup:=FetchDescriptorGroup(GFXRing.CmdBuffer,FShaderGroup.FLayout);
FUniformBuilder:=Default(TvUniformBuilder);
FVSShader.EnumUnifLayout(@FUniformBuilder.AddAttr,FVSShader.FDescSetId,@GPU_REGS.SPI.VS.USER_DATA);
if (FPSShader<>nil) then
FPSShader.EnumUnifLayout(@FUniformBuilder.AddAttr,FPSShader.FDescSetId,@GPU_REGS.SPI.PS.USER_DATA);
if (Length(FUniformBuilder.FImages)<>0) then
begin
For i:=0 to High(FUniformBuilder.FImages) do
With FUniformBuilder.FImages[i] do
begin
ri:=FetchImage(GFXRing.CmdBuffer,
FImage,
VK_IMAGE_USAGE_DEFAULT_COLOR(FImage.cformat),
TM_READ
);
iv:=ri.FetchView(GFXRing.CmdBuffer,FView);
//if not GFXRing.CmdBuffer.IsRenderPass then
begin
//if FindImageInFrameBuffer(FRenderCmd.FFramebuffer,ri) then
//begin
// writeln('wtf');
//end;
ri.PushBarrier(GFXRing.CmdBuffer,
ord(VK_ACCESS_SHADER_READ_BIT),
VK_IMAGE_LAYOUT_GENERAL,
ord(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT) or
ord(VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) );
end;
end;
end;
{
Event:=TvEvent2.Create;
GFXRing.CmdBuffer.AddDependence(@Event.Release);
vkCmdSetEvent(
GFXRing.CmdBuffer.cmdbuf,
Event.FHandle,
ord(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT));
vkCmdWaitEvents(
GFXRing.CmdBuffer.cmdbuf,
1,@Event.FHandle,
ord(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT),
ord(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT),
0,nil,
0,nil,
0,nil);
}
if ctx_change then
begin
if not GFXRing.CmdBuffer.BeginRenderPass(FRenderCmd) then
begin
Writeln(StdErr,'!BeginRenderPass');
Assert(false,'!BeginRenderPass');
end;
end;
//
if (FPSShader<>nil) then
if (FPSShader.FPushConst.size<>0) then
begin
pData:=FPSShader.GetPushConstData(@GPU_REGS.SPI.PS.USER_DATA);
if (pData<>nil) then
GFXRing.CmdBuffer.PushConstant(VK_PIPELINE_BIND_POINT_GRAPHICS,
ord(VK_SHADER_STAGE_FRAGMENT_BIT),
FPSShader.FPushConst.offset,FPSShader.FPushConst.size,
pData);
end;
if (FVSShader.FPushConst.size<>0) then
begin
pData:=FVSShader.GetPushConstData(@GPU_REGS.SPI.VS.USER_DATA);
if (pData<>nil) then
GFXRing.CmdBuffer.PushConstant(VK_PIPELINE_BIND_POINT_GRAPHICS,
ord(VK_SHADER_STAGE_VERTEX_BIT),
FVSShader.FPushConst.offset,FVSShader.FPushConst.size,
pData);
end;
if (Length(FAttrBuilder.FBindExt)<>0) then
begin
For i:=0 to High(FAttrBuilder.FBindExt) do
With FAttrBuilder.FBindExt[i] do
begin
buf:=FetchHostBuffer(GFXRing.CmdBuffer,min_addr,GetSize,ord(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT));
GFXRing.CmdBuffer.BindVertexBuffer(
binding,
buf.FHandle,
buf.Foffset);
end;
end;
//
if (Length(FUniformBuilder.FImages)<>0) then
begin
For i:=0 to High(FUniformBuilder.FImages) do
With FUniformBuilder.FImages[i] do
begin
ri:=FetchImage(GFXRing.CmdBuffer,
FImage,
VK_IMAGE_USAGE_DEFAULT_COLOR(FImage.cformat),
TM_READ
);
iv:=ri.FetchView(GFXRing.CmdBuffer,FView);
//ri.data_usage:=ri.data_usage and (not TM_READ); ////////
FDescriptorGroup.FSets[fset].BindImg(bind,0,
iv.FHandle,
VK_IMAGE_LAYOUT_GENERAL);
end;
end;
//
if (Length(FUniformBuilder.FSamplers)<>0) then
begin
For i:=0 to High(FUniformBuilder.FSamplers) do
With FUniformBuilder.FSamplers[i] do
begin
sm:=FetchSampler(GFXRing.CmdBuffer,PS);
FDescriptorGroup.FSets[fset].BindSmp(bind,0,sm.FHandle);
end;
end;
//
if (Length(FUniformBuilder.FBuffers)<>0) then
begin
For i:=0 to High(FUniformBuilder.FBuffers) do
With FUniformBuilder.FBuffers[i] do
begin
buf:=FetchHostBuffer(GFXRing.CmdBuffer,addr,size,ord(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
o:=buf.Foffset;
a:=o-AlignDw(o,limits.minStorageBufferOffsetAlignment);
//Writeln('align_offset=',a);
if (a<>offset) then Assert(false);
o:=AlignDw(o,limits.minStorageBufferOffsetAlignment{ $10}); //minStorageBufferOffsetAlignment
FDescriptorGroup.FSets[fset].BindBuf(bind,0,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
buf.FHandle,
o,
VK_WHOLE_SIZE);
end;
end;
GFXRing.CmdBuffer.BindSets(VK_PIPELINE_BIND_POINT_GRAPHICS,FDescriptorGroup);
//writeln;
end;
function UpdateGpuRegsInfoCompute:Boolean;
var
FUniformBuilder:TvUniformBuilder;
fdump_cs:RawByteString;
i,o,a:Integer;
pData:Pointer;
buf:TvHostBuffer;
FDescriptorGroup:TvDescriptorGroup;
FCSShader:TvShaderExt;
FShadersKey:TvShadersKey;
FShaderGroup:TvShaderGroup;
FComputePipeline:TvComputePipeline2;
pa:TPushConstAllocator;
begin
Result:=False;
if COMPUTE_DISABLE_HACK then Exit;
{$ifdef null_rt}Exit;{$endif}
if (LastSetShCount=GFXRing.SetShCount) and
(LastSetCxCount=GFXRing.SetCxCount) then
begin
Exit(True);
end;
LastSetShCount:=GFXRing.SetShCount;
LastSetCxCount:=GFXRing.SetCxCount;
fdump_cs:=DumpCS(GPU_REGS);
GFXRing.AllocCmdBuffer;
GFXRing.CmdBuffer.EndRenderPass;
pa.Init;
FCSShader:=FetchShader(vShaderStageCs,0,GPU_REGS,@pa);
if (FCSShader=nil) then Exit;
FShadersKey:=Default(TvShadersKey);
FShadersKey.SetCSShader(FCSShader);
FShaderGroup:=FetchShaderGroup(@FShadersKey);
Assert(FShaderGroup<>nil);
FComputePipeline:=TvComputePipeline2.Create;
FComputePipeline.SetLayout(FShaderGroup.FLayout);
FComputePipeline.SetShader(FCSShader);
FComputePipeline.Compile;
GFXRing.CmdBuffer.BindCompute(FComputePipeline);
if (FCSShader.FPushConst.size<>0) then
begin
pData:=FCSShader.GetPushConstData(@GPU_REGS.SPI.CS.USER_DATA);
if (pData<>nil) then
GFXRing.CmdBuffer.PushConstant(VK_PIPELINE_BIND_POINT_COMPUTE,
ord(VK_SHADER_STAGE_COMPUTE_BIT),
0,FCSShader.FPushConst.size,
pData);
end;
FUniformBuilder:=Default(TvUniformBuilder);
FCSShader.EnumUnifLayout(@FUniformBuilder.AddAttr,FCSShader.FDescSetId,@GPU_REGS.SPI.CS.USER_DATA);
FDescriptorGroup:=FetchDescriptorGroup(GFXRing.CmdBuffer,FShaderGroup.FLayout);
if (Length(FUniformBuilder.FBuffers)<>0) then
begin
For i:=0 to High(FUniformBuilder.FBuffers) do
With FUniformBuilder.FBuffers[i] do
begin
buf:=FetchHostBuffer(GFXRing.CmdBuffer,addr,size,ord(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
o:=buf.Foffset;
a:=o-AlignDw(o,limits.minStorageBufferOffsetAlignment);
//Writeln('align_offset=',a);
if (a<>offset) then Assert(false);
o:=AlignDw(o,limits.minStorageBufferOffsetAlignment { $10}); //minStorageBufferOffsetAlignment
FDescriptorGroup.FSets[fset].BindBuf(bind,0,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
buf.FHandle,
o,
VK_WHOLE_SIZE);
end;
end;
GFXRing.CmdBuffer.BindSets(VK_PIPELINE_BIND_POINT_COMPUTE,FDescriptorGroup);
//
Result:=True;
end;
procedure test_reset_index(INDEX_TYPE:TVkIndexType;RESET_EN:Byte;IB_RESET_INDX:DWORD);
begin
if (RESET_EN<>0) then
begin
Case INDEX_TYPE of
VK_INDEX_TYPE_UINT8_EXT:Assert(IB_RESET_INDX=$000000FF,'unsupport reset index:'+HexStr(IB_RESET_INDX,8));
VK_INDEX_TYPE_UINT16 :Assert(IB_RESET_INDX=$0000FFFF,'unsupport reset index:'+HexStr(IB_RESET_INDX,8));
VK_INDEX_TYPE_UINT32 :Assert(IB_RESET_INDX=$FFFFFFFF,'unsupport reset index:'+HexStr(IB_RESET_INDX,8));
else;
end;
end;
end;
procedure onDrawIndex2(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDDRAWINDEX2);
var
Addr:Pointer;
begin
GPU_REGS.VGT_DMA.MAX_SIZE:=Body^.maxSize;
GPU_REGS.VGT_DMA.BASE_LO :=Body^.indexBaseLo;
GPU_REGS.VGT_DMA.BASE_HI :=Body^.indexBaseHi;
GPU_REGS.VGT_DMA.SIZE :=Body^.indexCount;
GPU_REGS.VGT_DMA.INDICES :=Body^.indexCount;
//drawInitiator:TVGT_DRAW_INITIATOR;
test_reset_index(GPU_REGS.GET_INDEX_TYPE,
GPU_REGS.VGT_MULTI_PRIM.IB_RESET_EN.RESET_EN,
GPU_REGS.VGT_MULTI_PRIM.IB_RESET_INDX);
if UpdateGpuRegsInfo then
begin
GFXRing.AllocCmdBuffer;
Addr:=getIndexAddress(GPU_REGS.VGT_DMA.BASE_LO,GPU_REGS.VGT_DMA.BASE_HI);
GFXRing.CmdBuffer.instanceCount:=GPU_REGS.VGT_DMA.NUM_INSTANCES;
GFXRing.CmdBuffer.DrawIndex2(Addr,GPU_REGS.VGT_DMA.INDICES,GPU_REGS.GET_INDEX_TYPE);
end;
{$ifdef ww}
Writeln('DrawIndex:',Body^.indexCount);
Writeln('VGT_VTX_INDX.CNT_EN :',GPU_REGS.VGT_VTX_INDX.CNT_EN.VTX_CNT_EN);
Writeln('VGT_VTX_INDX.INDX_OFFSET :',GPU_REGS.VGT_VTX_INDX.INDX_OFFSET);
Writeln('VGT_MULTI_PRIM.IB_RESET_EN :',GPU_REGS.VGT_MULTI_PRIM.IB_RESET_EN.RESET_EN);
Writeln('VGT_MULTI_PRIM.IB_RESET_INDX:',HexStr(GPU_REGS.VGT_MULTI_PRIM.IB_RESET_INDX,8));
{$endif}
//GFXMicroEngine.PushCmd(GFXRing.CmdBuffer);
end;
procedure onDrawIndexAuto(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDDRAWINDEXAUTO);
begin
GPU_REGS.VGT_DMA.INDICES:=Body^.indexCount;
if UpdateGpuRegsInfo then
begin
GFXRing.AllocCmdBuffer;
GFXRing.CmdBuffer.instanceCount:=GPU_REGS.VGT_DMA.NUM_INSTANCES;
GFXRing.CmdBuffer.DrawIndexAuto(GPU_REGS.VGT_DMA.INDICES);
end;
{$ifdef ww}
Writeln('onDrawIndexAuto:',Body^.indexCount);
Writeln('VGT_VTX_INDX.CNT_EN:',GPU_REGS.VGT_VTX_INDX.CNT_EN.VTX_CNT_EN);
{$endif}
//GFXMicroEngine.PushCmd(GFXRing.CmdBuffer);
end;
procedure onIndexBase(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDDRAWINDEXBASE);
begin
GPU_REGS.VGT_DMA.BASE_LO:=Body^.indexBaseLo;
GPU_REGS.VGT_DMA.BASE_HI:=Body^.indexBaseHi;
end;
procedure onDrawIndexOffset2(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDDRAWINDEXOFFSET2);
var
Addr:Pointer;
begin
GPU_REGS.VGT_DMA.MAX_SIZE:=Body^.maxSize;
GPU_REGS.VGT_DMA.SIZE :=Body^.indexCount;
GPU_REGS.VGT_DMA.INDICES :=Body^.indexCount;
test_reset_index(GPU_REGS.GET_INDEX_TYPE,
GPU_REGS.VGT_MULTI_PRIM.IB_RESET_EN.RESET_EN,
GPU_REGS.VGT_MULTI_PRIM.IB_RESET_INDX);
if UpdateGpuRegsInfo then
begin
GFXRing.AllocCmdBuffer;
Addr:=getIndexAddress(GPU_REGS.VGT_DMA.BASE_LO,GPU_REGS.VGT_DMA.BASE_HI);
GFXRing.CmdBuffer.instanceCount:=GPU_REGS.VGT_DMA.NUM_INSTANCES;
GFXRing.CmdBuffer.DrawIndexOffset2(Addr,Body^.indexOffset,GPU_REGS.VGT_DMA.INDICES,GPU_REGS.GET_INDEX_TYPE);
end;
{$ifdef ww}
Writeln('DrawIndexOffset2:',Body^.indexOffset,' ',Body^.indexCount);
Writeln('VGT_VTX_INDX.CNT_EN :',GPU_REGS.VGT_VTX_INDX.CNT_EN.VTX_CNT_EN);
Writeln('VGT_VTX_INDX.INDX_OFFSET :',GPU_REGS.VGT_VTX_INDX.INDX_OFFSET);
Writeln('VGT_MULTI_PRIM.IB_RESET_EN :',GPU_REGS.VGT_MULTI_PRIM.IB_RESET_EN.RESET_EN);
Writeln('VGT_MULTI_PRIM.IB_RESET_INDX:',HexStr(GPU_REGS.VGT_MULTI_PRIM.IB_RESET_INDX,8));
{$endif}
end;
procedure onDispatchDirect(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDDISPATCHDIRECT);
begin
if UpdateGpuRegsInfoCompute then
begin
GFXRing.AllocCmdBuffer;
GFXRing.CmdBuffer.DispatchDirect(Body^.dimX,Body^.dimY,Body^.dimZ);
end;
{$ifdef ww}Writeln('onDispatchDirect:',Body^.dimX,':',Body^.dimY,':',Body^.dimZ);{$endif}
//GFXMicroEngine.PushCmd(GFXRing.CmdBuffer);
end;
type
PVGT_DMA_NUM_INSTANCES=^TVGT_DMA_NUM_INSTANCES;
procedure onNumInstances(pm4Hdr:PM4_TYPE_3_HEADER;Body:PVGT_DMA_NUM_INSTANCES);
begin
GPU_REGS.VGT_DMA.NUM_INSTANCES:=Body^;
{$ifdef ww}Writeln('onNumInstances:',Body^);{$endif}
end;
procedure gfx_cp_parser(node:PvSubmitInfo);
var
n,i,s,t:DWORD;
token:DWORD;
P:PByte;
begin
if (node=nil) then Exit;
n:=0;
While (n<node^.count) do
begin
if (node^.ccbGpuAddrs<>nil) and (node^.ccbSizesInBytes<>nil) then
begin
Assert(node^.ccbSizesInBytes[n]=0,'TODO CCB');
end;
i:=0;
s:=node^.dcbSizesInBytes[n];
P:=PByte(node^.dcbGpuAddrs[n]);
While (i<s) do
begin
token:=PDWORD(P)^;
case PM4_TYPE(token) of
0:begin //PM4_TYPE_0
onPm40(PM4_TYPE_0_HEADER(token),@PDWORD(P)[1]);
end;
2:begin //PM4_TYPE_2
onPm42(PM4_TYPE_2_HEADER(token));
//no body
t:=sizeof(DWORD);
P:=P+t;
i:=i+t;
Continue;
end;
3:begin //PM4_TYPE_3
case PM4_TYPE_3_HEADER(token).opcode of
IT_NOP:
begin
onNop(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_EVENT_WRITE_EOP:
begin
{$ifdef ww}Writeln('IT_EVENT_WRITE_EOP');{$endif}
onEventWriteEop(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_EVENT_WRITE_EOS:
begin
{$ifdef ww}Writeln('IT_EVENT_WRITE_EOS');{$endif}
onEventWriteEos(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_DMA_DATA:
begin
{$ifdef ww}Writeln('IT_DMA_DATA');{$endif}
onDMAData(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_ACQUIRE_MEM:
begin
{$ifdef ww}Writeln('IT_ACQUIRE_MEM');{$endif}
onAcquireMem(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_CONTEXT_CONTROL:
begin
{$ifdef ww}Writeln('IT_CONTEXT_CONTROL');{$endif}
onContextControl(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_CLEAR_STATE:
begin
{$ifdef ww}Writeln('IT_CLEAR_STATE');{$endif}
onClearState(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_SET_CONTEXT_REG:
begin
{$ifdef ww}Writeln('IT_SET_CONTEXT_REG');{$endif}
onSetContextReg(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_SET_SH_REG:
begin
{$ifdef ww}Writeln('IT_SET_SH_REG');{$endif}
onSetShReg(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_SET_UCONFIG_REG:
begin
{$ifdef ww}Writeln('IT_SET_UCONFIG_REG');{$endif}
onSetUConfigReg(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_SET_CONFIG_REG:
begin
{$ifdef ww}Writeln('IT_SET_CONFIG_REG');{$endif}
onSetConfigReg(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_INDEX_BUFFER_SIZE:
begin
{$ifdef ww}Writeln('IT_INDEX_BUFFER_SIZE');{$endif}
onIndexBufferSize(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_INDEX_TYPE:
begin
{$ifdef ww}Writeln('IT_INDEX_TYPE');{$endif}
onIndexType(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_DRAW_INDEX_2:
begin
{$ifdef ww}Writeln('IT_DRAW_INDEX_2');{$endif}
onDrawIndex2(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_DRAW_INDEX_AUTO:
begin
{$ifdef ww}Writeln('IT_DRAW_INDEX_AUTO');{$endif}
onDrawIndexAuto(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_INDEX_BASE:
begin
{$ifdef ww}Writeln('IT_INDEX_BASE');{$endif}
onIndexBase(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_DRAW_INDEX_OFFSET_2:
begin
{$ifdef ww}Writeln('IT_DRAW_INDEX_OFFSET_2');{$endif}
onDrawIndexOffset2(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_DISPATCH_DIRECT:
begin
{$ifdef ww}Writeln('IT_DISPATCH_DIRECT');{$endif}
onDispatchDirect(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_NUM_INSTANCES:
begin
{$ifdef ww}Writeln('IT_NUM_INSTANCES');{$endif}
onNumInstances(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_WAIT_REG_MEM:
begin
{$ifdef ww}Writeln('IT_WAIT_REG_MEM');{$endif}
onWaitRegMem(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_WRITE_DATA:
begin
{$ifdef ww}Writeln('IT_WRITE_DATA');{$endif}
onWriteData(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_EVENT_WRITE:
begin
{$ifdef ww}Writeln('IT_EVENT_WRITE'){$endif};
onEventWrite(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_PFP_SYNC_ME:
begin
{$ifdef ww}Writeln('IT_PFP_SYNC_ME'){$endif};
onPfpSyncMe(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
else
begin
Writeln('PM4_TYPE_3.opcode:0x',HexStr(PM4_TYPE_3_HEADER(token).opcode,2));
Assert(False);
end;
end;
case PM4_TYPE_3_HEADER(token).opcode of
IT_SET_CONTEXT_REG:;
IT_SET_SH_REG :;
IT_SET_UCONFIG_REG:;
else
GFXRing.LastSetReg:=0;
end;
end;
else
begin
Writeln('PM4_TYPE_',PM4_TYPE(token));
Assert(False);
end;
end;
t:=PM4_LENGTH_DW(token)*sizeof(DWORD);
P:=P+t;
i:=i+t;
end;
Inc(n);
end;
end;
function gfx_submit(CmdBuffer:TvCmdBuffer):Boolean;
begin
Result:=False;
if (CmdBuffer=nil) then Exit;
CmdBuffer.EndRenderPass;
CmdBuffer.Fence.Reset;
if (CmdBuffer.cmd_count<>0) then
begin
Result:=CmdBuffer.QueueSubmit;
end;
end;
function gfx_test(CmdBuffer:TvCmdBuffer):Boolean;
begin
Result:=True;
if (CmdBuffer=nil) then Exit;
Result:=(CmdBuffer.ret<>0) or
(CmdBuffer.cmd_count=0) or
(CmdBuffer.Fence.Status=VK_SUCCESS);
if Result then
begin
CmdBuffer.ReleaseResource;
FreeAndNil(CmdBuffer);
//GPU_REGS.ClearDMA;
end;
end;
initialization
GPU_REGS.InitDefault;
end.