Swizzle video texture data on write to mirrors.

This commit is contained in:
Unknown W. Brackets 2015-06-13 13:03:59 -07:00
parent edb55f5bea
commit 401bd35691
3 changed files with 106 additions and 19 deletions

View File

@ -21,6 +21,7 @@
#include "Core/MemMap.h"
#include "Core/MIPS/MIPS.h"
#include "Core/Reporting.h"
#include "GPU/Common/TextureDecoder.h"
#include "GPU/GPUInterface.h"
#include "Core/HW/SimpleAudioDec.h"
@ -610,48 +611,62 @@ int MediaEngine::writeVideoImage(u32 bufferPtr, int frameWidth, int videoPixelMo
#ifdef USE_FFMPEG
if (!m_pFrame || !m_pFrameRGB)
return 0;
int videoImageSize = 0;
// lock the image size
int height = m_desHeight;
int width = m_desWidth;
u8 *imgbuf = buffer;
const u8 *data = m_pFrameRGB->data[0];
int videoLineSize = 0;
switch (videoPixelMode) {
case GE_CMODE_32BIT_ABGR8888:
videoLineSize = frameWidth * sizeof(u32);
break;
case GE_CMODE_16BIT_BGR5650:
case GE_CMODE_16BIT_ABGR5551:
case GE_CMODE_16BIT_ABGR4444:
videoLineSize = frameWidth * sizeof(u16);
break;
}
int videoImageSize = videoLineSize * height;
bool swizzle = (bufferPtr & 0x00200000) == 0x00200000;
if (swizzle) {
imgbuf = new u8[videoImageSize];
}
switch (videoPixelMode) {
case GE_CMODE_32BIT_ABGR8888:
for (int y = 0; y < height; y++) {
writeVideoLineRGBA(imgbuf, data, width);
data += width * sizeof(u32);
imgbuf += frameWidth * sizeof(u32);
imgbuf += videoLineSize;
}
videoImageSize = frameWidth * sizeof(u32) * height;
break;
case GE_CMODE_16BIT_BGR5650:
for (int y = 0; y < height; y++) {
writeVideoLineABGR5650(imgbuf, data, width);
data += width * sizeof(u16);
imgbuf += frameWidth * sizeof(u16);
imgbuf += videoLineSize;
}
videoImageSize = frameWidth * sizeof(u16) * height;
break;
case GE_CMODE_16BIT_ABGR5551:
for (int y = 0; y < height; y++) {
writeVideoLineABGR5551(imgbuf, data, width);
data += width * sizeof(u16);
imgbuf += frameWidth * sizeof(u16);
imgbuf += videoLineSize;
}
videoImageSize = frameWidth * sizeof(u16) * height;
break;
case GE_CMODE_16BIT_ABGR4444:
for (int y = 0; y < height; y++) {
writeVideoLineABGR4444(imgbuf, data, width);
data += width * sizeof(u16);
imgbuf += frameWidth * sizeof(u16);
imgbuf += videoLineSize;
}
videoImageSize = frameWidth * sizeof(u16) * height;
break;
default:
@ -659,6 +674,17 @@ int MediaEngine::writeVideoImage(u32 bufferPtr, int frameWidth, int videoPixelMo
break;
}
if (swizzle) {
const u32 pitch = videoLineSize / 4;
const int bxc = videoLineSize / 16;
int byc = (height + 7) / 8;
if (byc == 0)
byc = 1;
DoSwizzleTex16((const u32 *)imgbuf, buffer, bxc, byc, pitch, videoLineSize);
delete [] imgbuf;
}
#ifndef MOBILE_DEVICE
CBreakPoints::ExecMemCheck(bufferPtr, true, videoImageSize, currentMIPS->pc);
#endif
@ -680,11 +706,29 @@ int MediaEngine::writeVideoImageWithRange(u32 bufferPtr, int frameWidth, int vid
#ifdef USE_FFMPEG
if (!m_pFrame || !m_pFrameRGB)
return 0;
int videoImageSize = 0;
// lock the image size
u8 *imgbuf = buffer;
const u8 *data = m_pFrameRGB->data[0];
int videoLineSize = 0;
switch (videoPixelMode) {
case GE_CMODE_32BIT_ABGR8888:
videoLineSize = frameWidth * sizeof(u32);
break;
case GE_CMODE_16BIT_BGR5650:
case GE_CMODE_16BIT_ABGR5551:
case GE_CMODE_16BIT_ABGR4444:
videoLineSize = frameWidth * sizeof(u16);
break;
}
int videoImageSize = videoLineSize * height;
bool swizzle = (bufferPtr & 0x00200000) == 0x00200000;
if (swizzle) {
imgbuf = new u8[videoImageSize];
}
if (width > m_desWidth - xpos)
width = m_desWidth - xpos;
if (height > m_desHeight - ypos)
@ -696,12 +740,11 @@ int MediaEngine::writeVideoImageWithRange(u32 bufferPtr, int frameWidth, int vid
for (int y = 0; y < height; y++) {
writeVideoLineRGBA(imgbuf, data, width);
data += m_desWidth * sizeof(u32);
imgbuf += frameWidth * sizeof(u32);
imgbuf += videoLineSize;
#ifndef MOBILE_DEVICE
CBreakPoints::ExecMemCheck(bufferPtr + y * frameWidth * sizeof(u32), true, width * sizeof(u32), currentMIPS->pc);
#endif
}
videoImageSize = frameWidth * sizeof(u32) * m_desHeight;
break;
case GE_CMODE_16BIT_BGR5650:
@ -709,12 +752,11 @@ int MediaEngine::writeVideoImageWithRange(u32 bufferPtr, int frameWidth, int vid
for (int y = 0; y < height; y++) {
writeVideoLineABGR5650(imgbuf, data, width);
data += m_desWidth * sizeof(u16);
imgbuf += frameWidth * sizeof(u16);
imgbuf += videoLineSize;
#ifndef MOBILE_DEVICE
CBreakPoints::ExecMemCheck(bufferPtr + y * frameWidth * sizeof(u16), true, width * sizeof(u16), currentMIPS->pc);
#endif
}
videoImageSize = frameWidth * sizeof(u16) * m_desHeight;
break;
case GE_CMODE_16BIT_ABGR5551:
@ -722,12 +764,11 @@ int MediaEngine::writeVideoImageWithRange(u32 bufferPtr, int frameWidth, int vid
for (int y = 0; y < height; y++) {
writeVideoLineABGR5551(imgbuf, data, width);
data += m_desWidth * sizeof(u16);
imgbuf += frameWidth * sizeof(u16);
imgbuf += videoLineSize;
#ifndef MOBILE_DEVICE
CBreakPoints::ExecMemCheck(bufferPtr + y * frameWidth * sizeof(u16), true, width * sizeof(u16), currentMIPS->pc);
#endif
}
videoImageSize = frameWidth * sizeof(u16) * m_desHeight;
break;
case GE_CMODE_16BIT_ABGR4444:
@ -735,12 +776,11 @@ int MediaEngine::writeVideoImageWithRange(u32 bufferPtr, int frameWidth, int vid
for (int y = 0; y < height; y++) {
writeVideoLineABGR4444(imgbuf, data, width);
data += m_desWidth * sizeof(u16);
imgbuf += frameWidth * sizeof(u16);
imgbuf += videoLineSize;
#ifndef MOBILE_DEVICE
CBreakPoints::ExecMemCheck(bufferPtr + y * frameWidth * sizeof(u16), true, width * sizeof(u16), currentMIPS->pc);
#endif
}
videoImageSize = frameWidth * sizeof(u16) * m_desHeight;
break;
default:

View File

@ -161,6 +161,52 @@ static u32 QuickTexHashBasic(const void *checkp, u32 size) {
return check;
}
void DoSwizzleTex16(const u32 *ysrcp, u8 *texptr, int bxc, int byc, u32 pitch, u32 rowWidth) {
#ifdef _M_SSE
__m128i *dest = (__m128i *)texptr;
for (int by = 0; by < byc; by++) {
const __m128i *xsrc = (const __m128i *)ysrcp;
for (int bx = 0; bx < bxc; bx++) {
const __m128i *src = xsrc;
for (int n = 0; n < 2; n++) {
// Textures are always 16-byte aligned so this is fine.
__m128i temp1 = _mm_load_si128(src);
src += pitch >> 2;
__m128i temp2 = _mm_load_si128(src);
src += pitch >> 2;
__m128i temp3 = _mm_load_si128(src);
src += pitch >> 2;
__m128i temp4 = _mm_load_si128(src);
src += pitch >> 2;
_mm_store_si128(dest, temp1);
_mm_store_si128(dest + 1, temp2);
_mm_store_si128(dest + 2, temp3);
_mm_store_si128(dest + 3, temp4);
dest += 4;
}
xsrc++;
}
ysrcp += (rowWidth * 8) / 4;
}
#else
u32 *dest = (u32 *)texptr;
for (int by = 0; by < byc; by++) {
const u32 *xsrc = ysrcp;
for (int bx = 0; bx < bxc; bx++) {
const u32 *src = xsrc;
for (int n = 0; n < 8; n++) {
memcpy(dest, src, 16);
src += pitch;
dest += 4;
}
xsrc += 4;
}
ysrcp += (rowWidth * 8) / 4;
}
#endif
}
void DoUnswizzleTex16Basic(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch, u32 rowWidth) {
#ifdef _M_SSE
const __m128i *src = (const __m128i *)texptr;
@ -184,7 +230,7 @@ void DoUnswizzleTex16Basic(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32
dest += pitch >> 2;
src += 4;
}
xdest ++;
xdest++;
}
ydestp += (rowWidth * 8) / 4;
}

View File

@ -37,6 +37,7 @@ void SetupTextureDecoder();
u32 QuickTexHashSSE2(const void *checkp, u32 size);
#define DoQuickTexHash QuickTexHashSSE2
void DoSwizzleTex16(const u32 *ysrcp, u8 *texptr, int bxc, int byc, u32 pitch, u32 rowWidth);
void DoUnswizzleTex16Basic(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch, u32 rowWidth);
#define DoUnswizzleTex16 DoUnswizzleTex16Basic