This commit is contained in:
Pavel 2022-10-12 15:41:51 +03:00
parent 28fffc6fd9
commit 640e7d1a36
9 changed files with 252 additions and 51 deletions

View File

@ -594,24 +594,40 @@ type
PPM4CMDDRAWINDEX2=^TPM4CMDDRAWINDEX2;
TPM4CMDDRAWINDEX2=packed record
maxSize:DWORD; // VGT_DMA_MAX_SIZE
indexBaseLo:DWORD; // VGT_DMA_BASE
indexBaseHi:DWORD; // VGT_DMA_BASE_HI
indexCount:DWORD; // VGT_DMA_SIZE ,VGT_NUM_INDICES
maxSize :DWORD; // VGT_DMA_MAX_SIZE
indexBaseLo :DWORD; // VGT_DMA_BASE
indexBaseHi :DWORD; // VGT_DMA_BASE_HI
indexCount :DWORD; // VGT_DMA_SIZE ,VGT_NUM_INDICES
drawInitiator:TVGT_DRAW_INITIATOR;
end;
PPM4CMDDRAWINDEXAUTO=^TPM4CMDDRAWINDEXAUTO;
TPM4CMDDRAWINDEXAUTO=packed record
indexCount:DWORD; ///< max index count
indexCount :DWORD; ///< max index count
drawInitiator:TVGT_DRAW_INITIATOR;
end;
PPM4CMDDRAWINDEXBASE=^TPM4CMDDRAWINDEXBASE;
TPM4CMDDRAWINDEXBASE=bitpacked record
indexBaseLo:DWORD; ///< Base Address Lo of index buffer, must be 2 byte aligned
indexBaseHi:Word; ///< Base Address Hi of index buffer
reserved1 :bit14;
baseSelect :bit2; ///< Base Address select mode
end;
PPM4CMDDRAWINDEXOFFSET2=^TPM4CMDDRAWINDEXOFFSET2;
TPM4CMDDRAWINDEXOFFSET2=packed record
maxSize :DWORD; ///< maximum number of indices
indexOffset :DWORD; ///< zero based starting index number
indexCount :DWORD; ///< number of indices in the Index Buffer
drawInitiator:TVGT_DRAW_INITIATOR;
end;
PPM4CMDDISPATCHDIRECT=^TPM4CMDDISPATCHDIRECT;
TPM4CMDDISPATCHDIRECT=packed record
dimX:DWORD; ///< X dimensions of the array of thread groups to be dispatched
dimY:DWORD; ///< Y dimensions of the array of thread groups to be dispatched
dimZ:DWORD; ///< Z dimensions of the array of thread groups to be dispatched
dimX :DWORD; ///< X dimensions of the array of thread groups to be dispatched
dimY :DWORD; ///< Y dimensions of the array of thread groups to be dispatched
dimZ :DWORD; ///< Z dimensions of the array of thread groups to be dispatched
dispatchInitiator:TCOMPUTE_DISPATCH_INITIATOR; ///< Dispatch Initiator Register
end;

View File

@ -2337,6 +2337,31 @@ begin
//GFXMicroEngine.PushCmd(GFXRing.CmdBuffer);
end;
procedure onIndexBase(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDDRAWINDEXBASE);
begin
GPU_REGS.VGT_DMA.BASE_LO:=Body^.indexBaseLo;
GPU_REGS.VGT_DMA.BASE_HI:=Body^.indexBaseHi;
end;
procedure onDrawIndexOffset2(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDDRAWINDEXOFFSET2);
var
Addr:Pointer;
begin
GPU_REGS.VGT_DMA.MAX_SIZE:=Body^.maxSize;
GPU_REGS.VGT_DMA.SIZE :=Body^.indexCount;
GPU_REGS.VGT_DMA.INDICES :=Body^.indexCount;
if UpdateGpuRegsInfo then
begin
Addr:=getIndexAddress(GPU_REGS.VGT_DMA.BASE_LO,GPU_REGS.VGT_DMA.BASE_HI);
GFXRing.CmdBuffer.DrawIndexOffset2(Addr,Body^.indexOffset,GPU_REGS.VGT_DMA.INDICES,GPU_REGS.GET_INDEX_TYPE);
end;
{$ifdef ww}Writeln('DrawIndexOffset2:',Body^.indexOffset,' ',Body^.indexCount);{$endif}
end;
procedure onDispatchDirect(pm4Hdr:PM4_TYPE_3_HEADER;Body:PPM4CMDDISPATCHDIRECT);
begin
@ -2439,6 +2464,7 @@ begin
{$ifdef ww}Writeln('IT_SET_UCONFIG_REG');{$endif}
onSetUConfigReg(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_INDEX_TYPE:
begin
{$ifdef ww}Writeln('IT_INDEX_TYPE');{$endif}
@ -2454,6 +2480,17 @@ begin
{$ifdef ww}Writeln('IT_DRAW_INDEX_AUTO');{$endif}
onDrawIndexAuto(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_INDEX_BASE:
begin
{$ifdef ww}Writeln('IT_INDEX_BASE');{$endif}
onIndexBase(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_DRAW_INDEX_OFFSET_2:
begin
{$ifdef ww}Writeln('IT_DRAW_INDEX_OFFSET_2');{$endif}
onDrawIndexOffset2(PM4_TYPE_3_HEADER(token),@PDWORD(P)[1]);
end;
IT_DISPATCH_DIRECT:
begin
{$ifdef ww}Writeln('IT_DISPATCH_DIRECT');{$endif}
@ -2486,7 +2523,7 @@ begin
else
begin
Writeln('PM4_TYPE_3.opcode:',HexStr(PM4_TYPE_3_HEADER(token).opcode,2));
Writeln('PM4_TYPE_3.opcode:0x',HexStr(PM4_TYPE_3_HEADER(token).opcode,2));
Assert(False);
end;
end;

View File

@ -0,0 +1,95 @@
#version 450
layout (local_size_x = 16, local_size_y = 16) in;
layout (binding = 0) readonly buffer Host
{
uint data[];
} host;
layout (binding = 1, rgba8) writeonly uniform image2D resultImage;
layout (push_constant) uniform constants
{
vec4 gamma;
ivec2 pitch;
} cfg;
const ivec2 sh02 = ivec2(0,2);
const ivec4 sh1212 = ivec4(1,2,1,2);
const ivec4 sh1345 = ivec4(1,3,4,5);
const ivec3 sh345 = ivec3(3,4,5);
const ivec3 sh678 = ivec3(6,7,8);
const ivec4 sh6789 = ivec4(6,7,8,9);
const ivec4 sh6543 = ivec4(6,5,4,3);
const ivec4 sh9101112 = ivec4(9,10,11,12);
const ivec2 i2_1 = ivec2(1,1);
const ivec3 i3_1 = ivec3(1,1,1);
const ivec4 i4_1 = ivec4(1,1,1,1);
int getElementIndex(ivec2 p) //[0..5]
{
ivec2 t1=(p.xy & i2_1) << sh02;
ivec4 t2=((p.xxyy >> sh1212) & i4_1) << sh1345;
t1=t1 | t2.xy | t2.zw;
return t1.x | t1.y;
}
int getPipeIndex(ivec2 p) //[6..8]
{
ivec3 t=(((p.xxx >> sh345) ^ (p.yyy >> sh345) ^ ivec3(p.x>>4,0,0)) & i3_1) << sh678;
return t.x | t.y | t.z;
}
int getBankIndex(ivec2 p) //[9..12]
{
ivec4 bank=(((p.xxxx >> sh6789) ^ (p.yyyy >> sh6543) ^ ivec4(0,p.y>>6,0,0)) & i4_1) << sh9101112;
ivec2 t=bank.xy | bank.zw;
return t.x | t.y;
}
void main()
{
ivec2 pixelCoords = ivec2(gl_GlobalInvocationID.xy);
int element_index=getElementIndex(pixelCoords);
int pipe=getPipeIndex(pixelCoords);
int bank=getBankIndex(pixelCoords);
//const ivec4 shmt = ivec4(7,6,6,3);
const ivec2 shmt = ivec2(7,6);
//const ivec2 bmod = ivec2(1,1);
//ivec4 mt=(pixelCoords.xyxy >> shmt);
ivec2 mt=(pixelCoords.xy >> shmt);
ivec2 total_offset=(mt.xy*cfg.pitch);
//+(mt.zw % bmod);
int offset = element_index | pipe | bank | ((total_offset.x+total_offset.y) << 13);
uint pack=host.data[offset];
////const uvec4 shift = uvec4(0,8,16,24);
////const uvec4 mask4 = uvec4(255,255,255,255);
////uvec4 pix_int=(uvec4(pack,pack,pack,pack) >> shift) & mask4;
//0,8,16,24
//RGBA
////vec4 pixel = vec4(pix_int) / mask4;
vec4 pixel=unpackUnorm4x8(pack);
pixel=pixel.rgba;
pixel = pow(pixel, cfg.gamma);
imageStore(resultImage, pixelCoords, pixel);
}

Binary file not shown.

View File

@ -1,9 +1,9 @@
@echo off
Set spirvgls=spirv\glslangValidator -g0 -V --target-env vulkan1.0
Set spirvgls=glslangValidator -g0 -V --target-env vulkan1.0
Set spirvopt=spirv\spirv-opt --eliminate-dead-branches --eliminate-local-multi-store --inline-entry-points-exhaustive --eliminate-dead-code-aggressive --scalar-replacement --simplify-instructions
Set spirvopt=spirv-opt --eliminate-dead-branches --eliminate-local-multi-store --inline-entry-points-exhaustive --eliminate-dead-code-aggressive --scalar-replacement --simplify-instructions
For /F %%a in ('dir /B') do if "%%~xa"==".comp" (call :compil %%a %%~na)

View File

@ -312,6 +312,7 @@ begin
Dec(rc);
if rc=0 then Break;
end;
end;
procedure TUserApp.OnTimer(Sender:TObject);
@ -326,7 +327,7 @@ var
Procedure App_Run;
begin
Timer:=TTimer.Create(nil);
Timer:=TTimer.Create(Application);
Timer.Interval:=10;
Timer.OnTimer:=@TUserApp(Application).OnTimer;
Timer.Enabled:=true;
@ -363,6 +364,8 @@ type
TVideoOut=class(TClassHandle)
FForm:TMyForm;
TVBlank:TTimer;
FGpuFlip:TvFlip;
FNodePos,FNodesUses:PtrUInt;
@ -408,15 +411,16 @@ type
function alloc_node:PQNode;
procedure free_node(n:PQNode);
procedure OnVblank(Sender:TObject);
procedure sceVideoOutOpen(node:PQNode);
procedure post_event_flip(flipArg:Int64);
procedure post_event_vblank(flipArg:Int64);
procedure post_event_vblank;
procedure sceVideoOutSubmitFlip(node:PQNode);
Constructor Create;
Destructor Destroy; override;
Destructor Destroy; override;
end;
procedure _on_free_kevent(data,userdata:Pointer);
@ -428,6 +432,7 @@ end;
Destructor TVideoOut.Destroy;
begin
FreeAndNil(FGpuFlip);
FreeAndNil(TVBlank);
FreeAndNil(FForm);
FlipEvents.LockWr;
HAMT_clear64(@FlipEvents.hamt,@_on_free_kevent,nil);
@ -491,6 +496,11 @@ begin
Message.Result:=1;
end;
procedure TVideoOut.OnVblank(Sender:TObject);
begin
post_event_vblank;
end;
procedure TVideoOut.sceVideoOutOpen(node:PQNode);
begin
@ -513,6 +523,11 @@ begin
FGpuFlip:=TvFlip.Create(FForm.Handle);
FGpuFlip.FNeoMode:=ps4_sceKernelIsNeoMode<>0;
TVBlank:=TTimer.Create(FForm);
TVBlank.Interval:=(1000 div 60); //59.94
TVBlank.OnTimer:=@OnVblank;
TVBlank.Enabled:=true;
//data? nop
free_node(node);
end;
@ -1118,6 +1133,17 @@ begin
_trigger_kevent_node(node,@_on_after,nil);
end;
procedure _on_trigger_blank(data,userdata:Pointer);
var
node:PKEventNode;
count:Byte;
begin
node:=data;
if (node=nil) then Exit;
node^.ev.data:=(ptruint(userdata) and $FFFFFFFFFFFF);
_trigger_kevent_node(node,@_on_after,nil);
end;
procedure TVideoOut.post_event_flip(flipArg:Int64);
begin
//Writeln('post_event_flip');
@ -1126,11 +1152,24 @@ begin
FlipEvents.Unlock;
end;
procedure TVideoOut.post_event_vblank(flipArg:Int64);
procedure TVideoOut.post_event_vblank;
var
elap:QWORD;
count:QWORD;
time:DWORD;
hz:Byte;
begin
hz:=60; //59.94
time:=(1000000 div hz);
elap:=SwTimePassedUnits(VblankStatus.FTsc);
elap:=(elap+9) div 10;
count:=elap div time;
//Writeln('post_event_vblank');
VblankEvents.LockRd;
HAMT_traverse64(@VblankEvents.hamt,@_on_trigger_flip,Pointer(flipArg));
HAMT_traverse64(@VblankEvents.hamt,@_on_trigger_blank,Pointer(count));
VblankEvents.Unlock;
end;
@ -1267,13 +1306,13 @@ begin
if (bufferIndex=SCE_VIDEO_OUT_BUFFER_INDEX_BLANK) then
begin
post_event_flip(flipArg);
post_event_vblank(flipArg);
post_event_vblank;
end else
begin
System.InterlockedDecrement64(FLabels[bufferIndex]);
post_event_flip(flipArg);
post_event_vblank(flipArg);
post_event_vblank;
end;
Case _type of

View File

@ -148,6 +148,7 @@ type
Procedure dmaData(src:DWORD;dst:Pointer;byteCount:DWORD;isBlocking:Boolean);
Procedure writeAtEndOfShader(eventType:Byte;dst:Pointer;value:DWORD);
Procedure DrawIndexOffset2(Addr:Pointer;OFFSET,INDICES:DWORD;INDEX_TYPE:TVkIndexType);
Procedure DrawIndex2(Addr:Pointer;INDICES:DWORD;INDEX_TYPE:TVkIndexType);
Procedure DrawIndexAuto(INDICES:DWORD);
end;
@ -856,7 +857,7 @@ begin
end;
end;
Procedure TvCmdBuffer.DrawIndex2(Addr:Pointer;INDICES:DWORD;INDEX_TYPE:TVkIndexType);
Procedure TvCmdBuffer.DrawIndexOffset2(Addr:Pointer;OFFSET,INDICES:DWORD;INDEX_TYPE:TVkIndexType);
var
rb:TvHostBuffer;
Size:TVkDeviceSize;
@ -887,22 +888,26 @@ begin
begin
vkCmdDrawIndexed(
cmdbuf,
INDICES,
1,0,0,0);
INDICES, //indexCount
1, //instanceCount
OFFSET, //firstIndex
0, //vertexOffset
0); //firstInstance
end;
DI_PT_QUADLIST:
begin
Assert(OFFSET=0);
h:=INDICES div 4;
if (h>0) then h:=h-1;
For i:=0 to h do
begin
vkCmdDrawIndexed(
cmdbuf,
4,
1,
i*4,
0,
0);
4, //indexCount
1, //instanceCount
i*4, //firstIndex
0, //vertexOffset
0); //firstInstance
end;
end;
else
@ -911,6 +916,11 @@ begin
end;
Procedure TvCmdBuffer.DrawIndex2(Addr:Pointer;INDICES:DWORD;INDEX_TYPE:TVkIndexType);
begin
DrawIndexOffset2(Addr,0,INDICES,INDEX_TYPE);
end;
Procedure TvCmdBuffer.DrawIndexAuto(INDICES:DWORD);
var
i,h:DWORD;
@ -937,6 +947,7 @@ begin
}
//0 1 2
//0 2 3
h:=INDICES div 3;
if (h>0) then h:=h-1;
For i:=0 to h do
@ -944,11 +955,12 @@ begin
Inc(cmd_count);
vkCmdDraw(
cmdbuf,
4,
1,
0,
0);
4, //vertexCount
1, //instanceCount
0, //firstVertex
0); //firstInstance
end;
end;
//DI_PT_LINELOOP :;
DI_PT_QUADLIST :

View File

@ -342,14 +342,11 @@ begin
FreeAndNil(Ffilp_shader);
Ffilp_shader:=TvShaderCompute.Create;
//Ffilp_shader.FLocalSize.Create(64,64,1);
//Ffilp_shader.FLocalSize:=limits.maxComputeWorkGroupSize;
Ffilp_shader.LoadFromFile('shaders\FLIP_LINE_A8R8G8B8_SRGB.spv');
FPipelineFlip.SetShader(Ffilp_shader);
end;
else
Assert(false);
Assert(false,HexStr(format,8));
end;
SCE_VIDEO_OUT_TILING_MODE_TILE:
Case format of
@ -361,19 +358,6 @@ begin
FreeAndNil(Ffilp_shader);
Ffilp_shader:=TvShaderCompute.Create;
//Ffilp_shader.FLocalSize.Create(64,64,1);
{Ffilp_shader.FLocalSize:=limits.maxComputeWorkGroupSize;
Ffilp_shader.FLocalSize.z:=1;
While (Ffilp_shader.FLocalSize.x*Ffilp_shader.FLocalSize.y>limits.maxComputeWorkGroupInvocations) do
begin
if (Ffilp_shader.FLocalSize.x>Ffilp_shader.FLocalSize.y) then
Ffilp_shader.FLocalSize.x:=Ffilp_shader.FLocalSize.x div 2
else
Ffilp_shader.FLocalSize.y:=Ffilp_shader.FLocalSize.y div 2;
end;}
if FNeoMode then
begin
Ffilp_shader.LoadFromFile('shaders\FLIP_TILE_A8R8G8B8_SRGB_NEO.spv');
@ -382,12 +366,28 @@ begin
Ffilp_shader.LoadFromFile('shaders\FLIP_TILE_A8R8G8B8_SRGB.spv');
end;
//Ffilp_shader.FLocalSize.Create(16,16,1);
FPipelineFlip.SetShader(Ffilp_shader);
end;
SCE_VIDEO_OUT_PIXEL_FORMAT_A8B8G8R8_SRGB:
begin
Fformat:=format;
Ftmode:=tmode;
FlipQueue.WaitIdle;
FreeAndNil(Ffilp_shader);
Ffilp_shader:=TvShaderCompute.Create;
if FNeoMode then
begin
Assert(false,'TODO');
end else
begin
Ffilp_shader.LoadFromFile('shaders\FLIP_TILE_A8B8G8R8_SRGB.spv');
end;
FPipelineFlip.SetShader(Ffilp_shader);
end
else
Assert(false);
Assert(false,HexStr(format,8));
end;
else
Assert(false);

View File

@ -96,7 +96,7 @@ begin
mr:=t.GetRequirements;
Foffset:=0;
if not IsAlign(host.FOffset,mr.alignment) then
if (Size<mr.size) or (not IsAlign(host.FOffset,mr.alignment)) then
begin
pAlign:=AlignDw(host.FOffset,mr.alignment);
Foffset:=(host.FOffset-pAlign);
@ -104,6 +104,8 @@ begin
host.FOffset:=pAlign;
Size:=Size+Foffset;
if (Size<mr.size) then Size:=mr.size;
FreeAndNil(t);
t:=TvHostBuffer.Create(Size,usage,@buf_ext);