mirror of
https://github.com/libretro/RetroArch.git
synced 2024-11-25 00:49:47 +00:00
(3DS) video driver: performance improvements.
This commit is contained in:
parent
64e3e40bb6
commit
e754c328b0
@ -202,7 +202,7 @@ static void* ctr_init(const video_info_t* video,
|
||||
CTRGU_ATTRIBFMT(GPU_SHORT, 2) << 4,
|
||||
sizeof(ctr_vertex_t));
|
||||
GPUCMD_Finalize();
|
||||
GPUCMD_FlushAndRun(NULL);
|
||||
ctrGuFlushAndRun(true);
|
||||
gspWaitForEvent(GSPEVENT_P3D, false);
|
||||
|
||||
if (input && input_data)
|
||||
@ -214,7 +214,7 @@ static void* ctr_init(const video_info_t* video,
|
||||
|
||||
return ctr;
|
||||
}
|
||||
//#define gspWaitForEvent(...)
|
||||
|
||||
static bool ctr_frame(void* data, const void* frame,
|
||||
unsigned width, unsigned height, unsigned pitch, const char* msg)
|
||||
{
|
||||
@ -249,17 +249,17 @@ static bool ctr_frame(void* data, const void* frame,
|
||||
}
|
||||
|
||||
frames++;
|
||||
currentTick = osGetTime();
|
||||
currentTick = svcGetSystemTick();
|
||||
uint32_t diff = currentTick - lastTick;
|
||||
if(diff > 1000)
|
||||
if(diff > CTR_CPU_TICKS_PER_SECOND)
|
||||
{
|
||||
fps = (float)frames * (1000.0 / diff);
|
||||
fps = (float)frames * ((float) CTR_CPU_TICKS_PER_SECOND / (float) diff);
|
||||
lastTick = currentTick;
|
||||
frames = 0;
|
||||
}
|
||||
|
||||
printf("fps: %8.4f frames: %i\r", fps, total_frames++);
|
||||
fflush(stdout);
|
||||
// fflush(stdout);
|
||||
|
||||
/* enable this to profile the core without video output */
|
||||
#if 0
|
||||
@ -267,60 +267,83 @@ static bool ctr_frame(void* data, const void* frame,
|
||||
goto end;
|
||||
#endif
|
||||
|
||||
svcWaitSynchronization(gspEvents[GSPEVENT_P3D], 20000000);
|
||||
svcClearEvent(gspEvents[GSPEVENT_P3D]);
|
||||
svcWaitSynchronization(gspEvents[GSPEVENT_PPF], 20000000);
|
||||
svcClearEvent(gspEvents[GSPEVENT_PPF]);
|
||||
|
||||
gfxSwapBuffersGpu();
|
||||
|
||||
if (ctr->vsync)
|
||||
gspWaitForEvent(GSPEVENT_VBlank0, true);
|
||||
|
||||
ctrGuSetMemoryFill(true, (u32*)CTR_GPU_FRAMEBUFFER, 0x00000000,
|
||||
(u32*)(CTR_GPU_FRAMEBUFFER + CTR_TOP_FRAMEBUFFER_WIDTH * CTR_TOP_FRAMEBUFFER_HEIGHT * sizeof(uint32_t)),
|
||||
0x201, (u32*)CTR_GPU_DEPTHBUFFER, 0x00000000,
|
||||
(u32*)(CTR_GPU_DEPTHBUFFER + CTR_TOP_FRAMEBUFFER_WIDTH * CTR_TOP_FRAMEBUFFER_HEIGHT * sizeof(uint32_t)),
|
||||
0x201);
|
||||
|
||||
GPUCMD_SetBufferOffset(0);
|
||||
|
||||
if (width > ctr->texture_width)
|
||||
width = ctr->texture_width;
|
||||
if (height > ctr->texture_height)
|
||||
height = ctr->texture_height;
|
||||
|
||||
if(frame)
|
||||
{
|
||||
int i;
|
||||
uint16_t* dst = (uint16_t*)ctr->texture_linear;
|
||||
const uint8_t* src = frame;
|
||||
if (width > ctr->texture_width)
|
||||
width = ctr->texture_width;
|
||||
if (height > ctr->texture_height)
|
||||
height = ctr->texture_height;
|
||||
for (i = 0; i < height; i++)
|
||||
if(((((u32)(frame)) >= 0x14000000 && ((u32)(frame)) < 0x1c000000)) /* frame in linear memory */
|
||||
&& !((u32)frame & 0x7F) /* 128-byte aligned */
|
||||
&& !((pitch) & 0xF)) /* 16-byte aligned */
|
||||
{
|
||||
memcpy(dst, src, width * sizeof(uint16_t));
|
||||
dst += ctr->texture_width;
|
||||
src += pitch;
|
||||
/* can copy the buffer directly with the GPU */
|
||||
ctrGuCopyImage(false, frame, pitch / 2, height, CTRGU_RGB565, false,
|
||||
ctr->texture_swizzled, ctr->texture_width, CTRGU_RGB565, true);
|
||||
}
|
||||
GSPGPU_FlushDataCache(NULL, ctr->texture_linear,
|
||||
ctr->texture_width * ctr->texture_height * sizeof(uint16_t));
|
||||
else
|
||||
{
|
||||
int i;
|
||||
uint16_t* dst = (uint16_t*)ctr->texture_linear;
|
||||
const uint8_t* src = frame;
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
memcpy(dst, src, width * sizeof(uint16_t));
|
||||
dst += ctr->texture_width;
|
||||
src += pitch;
|
||||
}
|
||||
GSPGPU_FlushDataCache(NULL, ctr->texture_linear,
|
||||
ctr->texture_width * ctr->texture_height * sizeof(uint16_t));
|
||||
|
||||
ctrGuCopyImage(ctr->texture_linear, ctr->texture_width, ctr->menu.texture_height, CTRGU_RGB565, false,
|
||||
ctr->texture_swizzled, ctr->texture_width, CTRGU_RGB565, true);
|
||||
|
||||
gspWaitForEvent(GSPEVENT_PPF, false);
|
||||
|
||||
|
||||
ctrGuSetTexture(GPU_TEXUNIT0, VIRT_TO_PHYS(ctr->texture_swizzled), ctr->texture_width, ctr->texture_height,
|
||||
GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_MIN_FILTER(GPU_LINEAR) |
|
||||
GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_EDGE) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_EDGE),
|
||||
GPU_RGB565);
|
||||
|
||||
ctr->frame_coords->u = width;
|
||||
ctr->frame_coords->v = height;
|
||||
GSPGPU_FlushDataCache(NULL, (u8*)ctr->frame_coords, sizeof(ctr_vertex_t));
|
||||
|
||||
ctrGuSetAttributeBuffersAddress(VIRT_TO_PHYS(ctr->frame_coords));
|
||||
ctrGuSetVertexShaderFloatUniform(0, (float*)&ctr->scale_vector, 1);
|
||||
GPU_DrawArray(GPU_UNKPRIM, 1);
|
||||
ctrGuCopyImage(false, ctr->texture_linear, ctr->texture_width, ctr->menu.texture_height, CTRGU_RGB565, false,
|
||||
ctr->texture_swizzled, ctr->texture_width, CTRGU_RGB565, true);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
ctrGuSetTexture(GPU_TEXUNIT0, VIRT_TO_PHYS(ctr->texture_swizzled), ctr->texture_width, ctr->texture_height,
|
||||
GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_MIN_FILTER(GPU_LINEAR) |
|
||||
GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_EDGE) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_EDGE),
|
||||
GPU_RGB565);
|
||||
|
||||
ctr->frame_coords->u = width;
|
||||
ctr->frame_coords->v = height;
|
||||
GSPGPU_FlushDataCache(NULL, (u8*)ctr->frame_coords, sizeof(ctr_vertex_t));
|
||||
|
||||
ctrGuSetAttributeBuffersAddress(VIRT_TO_PHYS(ctr->frame_coords));
|
||||
ctrGuSetVertexShaderFloatUniform(0, (float*)&ctr->scale_vector, 1);
|
||||
GPU_DrawArray(GPU_UNKPRIM, 1);
|
||||
|
||||
if (ctr->menu_texture_enable)
|
||||
{
|
||||
|
||||
GSPGPU_FlushDataCache(NULL, ctr->menu.texture_linear,
|
||||
ctr->menu.texture_width * ctr->menu.texture_height * sizeof(uint16_t));
|
||||
|
||||
ctrGuCopyImage(ctr->menu.texture_linear, ctr->menu.texture_width, ctr->menu.texture_height, CTRGU_RGBA4444,false,
|
||||
ctrGuCopyImage(false, ctr->menu.texture_linear, ctr->menu.texture_width, ctr->menu.texture_height, CTRGU_RGBA4444,false,
|
||||
ctr->menu.texture_swizzled, ctr->menu.texture_width, CTRGU_RGBA4444, true);
|
||||
|
||||
gspWaitForEvent(GSPEVENT_PPF, false);
|
||||
|
||||
|
||||
ctrGuSetTexture(GPU_TEXUNIT0, VIRT_TO_PHYS(ctr->menu.texture_swizzled), ctr->menu.texture_width, ctr->menu.texture_height,
|
||||
GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_MIN_FILTER(GPU_LINEAR) |
|
||||
GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_EDGE) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_EDGE),
|
||||
@ -334,27 +357,14 @@ static bool ctr_frame(void* data, const void* frame,
|
||||
|
||||
GPU_FinishDrawing();
|
||||
GPUCMD_Finalize();
|
||||
GPUCMD_FlushAndRun(NULL);
|
||||
gspWaitForEvent(GSPEVENT_P3D, false);
|
||||
ctrGuFlushAndRun(true);
|
||||
|
||||
ctrGuDisplayTransfer(CTR_GPU_FRAMEBUFFER, 240,400, CTRGU_RGBA8,
|
||||
ctrGuDisplayTransfer(true, CTR_GPU_FRAMEBUFFER, 240,400, CTRGU_RGBA8,
|
||||
gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), 240,400,CTRGU_RGB8, CTRGU_MULTISAMPLE_NONE);
|
||||
|
||||
gspWaitForEvent(GSPEVENT_PPF, false);
|
||||
|
||||
GX_SetMemoryFill(NULL, (u32*)CTR_GPU_FRAMEBUFFER, 0x00000000,
|
||||
(u32*)(CTR_GPU_FRAMEBUFFER + CTR_TOP_FRAMEBUFFER_WIDTH * CTR_TOP_FRAMEBUFFER_HEIGHT * sizeof(uint32_t)),
|
||||
0x201, (u32*)CTR_GPU_DEPTHBUFFER, 0x00000000,
|
||||
(u32*)(CTR_GPU_DEPTHBUFFER + CTR_TOP_FRAMEBUFFER_WIDTH * CTR_TOP_FRAMEBUFFER_HEIGHT * sizeof(uint32_t)),
|
||||
0x201);
|
||||
|
||||
gspWaitForEvent(GSPEVENT_PSC0, false);
|
||||
gfxSwapBuffersGpu();
|
||||
|
||||
// if (ctr->vsync)
|
||||
// gspWaitForEvent(GSPEVENT_VBlank0, true);
|
||||
|
||||
end:
|
||||
// gspWaitForEvent(GSPEVENT_VBlank0, true);
|
||||
RARCH_PERFORMANCE_STOP(ctrframe_f);
|
||||
return true;
|
||||
}
|
||||
|
@ -48,36 +48,13 @@
|
||||
#define CTRGU_MULTISAMPLE_2x1 (1 << 24)
|
||||
#define CTRGU_MULTISAMPLE_2x2 (2 << 24)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t buffer[8];
|
||||
} gtrgu_gx_command_t;
|
||||
#define CTR_CPU_TICKS_PER_SECOND 268123480
|
||||
|
||||
__attribute__((always_inline))
|
||||
static INLINE int ctrGuWriteDisplayTransferCommand(gtrgu_gx_command_t* command,
|
||||
void* src, int src_w, int src_h,
|
||||
void* dst, int dst_w, int dst_h,
|
||||
uint32_t flags)
|
||||
{
|
||||
command->buffer[0] = 0x03; //CommandID
|
||||
command->buffer[1] = (uint32_t)src;
|
||||
command->buffer[2] = (uint32_t)dst;
|
||||
command->buffer[3] = CTRGU_SIZE(src_w, src_h);
|
||||
command->buffer[4] = CTRGU_SIZE(dst_w, dst_h);
|
||||
command->buffer[5] = flags;
|
||||
command->buffer[6] = 0x0;
|
||||
command->buffer[7] = 0x0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__attribute__((always_inline))
|
||||
static INLINE int ctrGuSubmitGxCommand(u32* gxbuf, gtrgu_gx_command_t* command)
|
||||
{
|
||||
if(!gxbuf) gxbuf = gxCmdBuf;
|
||||
|
||||
return GSPGPU_SubmitGxCommand(gxbuf, (u32*)command, NULL);
|
||||
}
|
||||
extern Handle gspEvents[GSPEVENT_MAX];
|
||||
extern u32* gpuCmdBuf;
|
||||
extern u32 gpuCmdBufOffset;
|
||||
extern u32 __linear_heap_size;
|
||||
extern u32* __linear_heap;
|
||||
|
||||
__attribute__((always_inline))
|
||||
static INLINE void ctrGuSetTexture(GPU_TEXUNIT unit, u32* data,
|
||||
@ -108,14 +85,68 @@ static INLINE void ctrGuSetTexture(GPU_TEXUNIT unit, u32* data,
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((always_inline))
|
||||
static INLINE Result ctrGuSetCommandList_First(bool queued, u32* buf0a, u32 buf0s, u32* buf1a, u32 buf1s, u32* buf2a, u32 buf2s)
|
||||
{
|
||||
u32 gxCommand[0x8];
|
||||
gxCommand[0]=0x05 | (queued? 0x01000000 : 0x0); //CommandID
|
||||
gxCommand[1]=(u32)buf0a; //buf0 address
|
||||
gxCommand[2]=(u32)buf0s; //buf0 size
|
||||
gxCommand[3]=(u32)buf1a; //buf1 address
|
||||
gxCommand[4]=(u32)buf1s; //buf1 size
|
||||
gxCommand[5]=(u32)buf2a; //buf2 address
|
||||
gxCommand[6]=(u32)buf2s; //buf2 size
|
||||
gxCommand[7]=0x0;
|
||||
|
||||
return GSPGPU_SubmitGxCommand(gxCmdBuf, gxCommand, NULL);
|
||||
}
|
||||
|
||||
__attribute__((always_inline))
|
||||
static INLINE Result ctrGuSetCommandList_Last(bool queued, u32* buf0a, u32 buf0s, u8 flags)
|
||||
{
|
||||
u32 gxCommand[0x8];
|
||||
gxCommand[0]=0x01 | (queued? 0x01000000 : 0x0); //CommandID
|
||||
gxCommand[1]=(u32)buf0a; //buf0 address
|
||||
gxCommand[2]=(u32)buf0s; //buf0 size
|
||||
gxCommand[3]=flags&1; //written to GSP module state
|
||||
gxCommand[4]=gxCommand[5]=gxCommand[6]=0x0;
|
||||
gxCommand[7]=(flags>>1)&1; //when non-zero, call svcFlushProcessDataCache() with the specified buffer
|
||||
|
||||
return GSPGPU_SubmitGxCommand(gxCmdBuf, gxCommand, NULL);
|
||||
}
|
||||
|
||||
__attribute__((always_inline))
|
||||
static INLINE void ctrGuFlushAndRun(bool queued)
|
||||
{
|
||||
//take advantage of GX_SetCommandList_First to flush gsp heap
|
||||
ctrGuSetCommandList_First(queued, gpuCmdBuf, gpuCmdBufOffset*4, __linear_heap, __linear_heap_size, NULL, 0);
|
||||
ctrGuSetCommandList_Last(queued, gpuCmdBuf, gpuCmdBufOffset*4, 0x0);
|
||||
}
|
||||
|
||||
__attribute__((always_inline))
|
||||
static INLINE Result ctrGuSetMemoryFill(bool queued, u32* buf0a, u32 buf0v, u32* buf0e, u16 width0, u32* buf1a, u32 buf1v, u32* buf1e, u16 width1)
|
||||
{
|
||||
u32 gxCommand[0x8];
|
||||
gxCommand[0]=0x02 | (queued? 0x01000000 : 0x0); //CommandID
|
||||
gxCommand[1]=(u32)buf0a; //buf0 address
|
||||
gxCommand[2]=buf0v; //buf0 value
|
||||
gxCommand[3]=(u32)buf0e; //buf0 end addr
|
||||
gxCommand[4]=(u32)buf1a; //buf1 address
|
||||
gxCommand[5]=buf1v; //buf1 value
|
||||
gxCommand[6]=(u32)buf1e; //buf1 end addr
|
||||
gxCommand[7]=(width0)|(width1<<16);
|
||||
|
||||
return GSPGPU_SubmitGxCommand(gxCmdBuf, gxCommand, NULL);
|
||||
}
|
||||
|
||||
__attribute__((always_inline))
|
||||
static INLINE Result ctrGuCopyImage
|
||||
(void* src, int src_w, int src_h, int src_fmt, bool src_is_tiled,
|
||||
void* dst, int dst_w, int dst_fmt, bool dst_is_tiled)
|
||||
(bool queued,
|
||||
const void* src, int src_w, int src_h, int src_fmt, bool src_is_tiled,
|
||||
void* dst, int dst_w, int dst_fmt, bool dst_is_tiled)
|
||||
{
|
||||
u32 gxCommand[0x8];
|
||||
gxCommand[0]=0x03; //CommandID
|
||||
gxCommand[0]=0x03 | (queued? 0x01000000 : 0x0); //CommandID
|
||||
gxCommand[1]=(u32)src;
|
||||
gxCommand[2]=(u32)dst;
|
||||
gxCommand[3]=dst_w&0xFF8;
|
||||
@ -133,11 +164,12 @@ static INLINE Result ctrGuCopyImage
|
||||
|
||||
__attribute__((always_inline))
|
||||
static INLINE Result ctrGuDisplayTransfer
|
||||
(void* src, int src_w, int src_h, int src_fmt,
|
||||
(bool queued,
|
||||
void* src, int src_w, int src_h, int src_fmt,
|
||||
void* dst, int dst_w, int dst_h, int dst_fmt, int multisample_lvl)
|
||||
{
|
||||
u32 gxCommand[0x8];
|
||||
gxCommand[0]=0x03; //CommandID
|
||||
gxCommand[0]=0x03 | (queued? 0x01000000 : 0x0); //CommandID
|
||||
gxCommand[1]=(u32)src;
|
||||
gxCommand[2]=(u32)dst;
|
||||
gxCommand[3]=CTRGU_SIZE(dst_w, dst_h);
|
||||
|
Loading…
Reference in New Issue
Block a user