mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-27 07:20:49 +00:00
Cleanup and clarify texture swizzling funcs.
This commit is contained in:
parent
b6778bf4ad
commit
3593a7963e
@ -673,13 +673,12 @@ int MediaEngine::writeVideoImage(u32 bufferPtr, int frameWidth, int videoPixelMo
|
||||
}
|
||||
|
||||
if (swizzle) {
|
||||
const u32 pitch = videoLineSize / 4;
|
||||
const int bxc = videoLineSize / 16;
|
||||
int byc = (height + 7) / 8;
|
||||
if (byc == 0)
|
||||
byc = 1;
|
||||
|
||||
DoSwizzleTex16((const u32 *)imgbuf, buffer, bxc, byc, pitch, videoLineSize);
|
||||
DoSwizzleTex16((const u32 *)imgbuf, buffer, bxc, byc, videoLineSize);
|
||||
delete [] imgbuf;
|
||||
}
|
||||
|
||||
@ -789,13 +788,12 @@ int MediaEngine::writeVideoImageWithRange(u32 bufferPtr, int frameWidth, int vid
|
||||
if (swizzle) {
|
||||
WARN_LOG_REPORT_ONCE(vidswizzle, ME, "Swizzling Video with range");
|
||||
|
||||
const u32 pitch = videoLineSize / 4;
|
||||
const int bxc = videoLineSize / 16;
|
||||
int byc = (height + 7) / 8;
|
||||
if (byc == 0)
|
||||
byc = 1;
|
||||
|
||||
DoSwizzleTex16((const u32 *)imgbuf, buffer, bxc, byc, pitch, videoLineSize);
|
||||
DoSwizzleTex16((const u32 *)imgbuf, buffer, bxc, byc, videoLineSize);
|
||||
delete [] imgbuf;
|
||||
}
|
||||
|
||||
|
@ -365,57 +365,24 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
|
||||
}
|
||||
|
||||
void TextureCacheCommon::UnswizzleFromMem(u32 *dest, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel) {
|
||||
// Note: bufw is always aligned to 16 bytes, so rowWidth is always >= 16.
|
||||
const u32 rowWidth = (bytesPerPixel > 0) ? (bufw * bytesPerPixel) : (bufw / 2);
|
||||
const u32 pitch = rowWidth / 4;
|
||||
// A visual mapping of unswizzling, where each letter is 16-byte and 8 letters is a block:
|
||||
//
|
||||
// ABCDEFGH IJKLMNOP
|
||||
// ->
|
||||
// AI
|
||||
// BJ
|
||||
// CK
|
||||
// ...
|
||||
//
|
||||
// bxc is the number of blocks in the x direction, and byc the number in the y direction.
|
||||
const int bxc = rowWidth / 16;
|
||||
// The height is not always aligned to 8, but rounds up.
|
||||
int byc = (height + 7) / 8;
|
||||
if (byc == 0)
|
||||
byc = 1;
|
||||
|
||||
u32 ydest = 0;
|
||||
if (rowWidth >= 16) {
|
||||
// The most common one, so it gets an optimized implementation.
|
||||
DoUnswizzleTex16(texptr, dest, bxc, byc, pitch, rowWidth);
|
||||
} else if (rowWidth == 8) {
|
||||
const u32 *src = (const u32 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 8; n++, ydest += 2) {
|
||||
dest[ydest + 0] = *src++;
|
||||
dest[ydest + 1] = *src++;
|
||||
src += 2; // skip two u32
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 4) {
|
||||
const u32 *src = (const u32 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 8; n++, ydest++) {
|
||||
dest[ydest] = *src++;
|
||||
src += 3;
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 2) {
|
||||
const u16 *src = (const u16 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 4; n++, ydest++) {
|
||||
u16 n1 = src[0];
|
||||
u16 n2 = src[8];
|
||||
dest[ydest] = (u32)n1 | ((u32)n2 << 16);
|
||||
src += 16;
|
||||
}
|
||||
}
|
||||
} else if (rowWidth == 1) {
|
||||
const u8 *src = (const u8 *) texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
for (int n = 0; n < 2; n++, ydest++) {
|
||||
u8 n1 = src[ 0];
|
||||
u8 n2 = src[16];
|
||||
u8 n3 = src[32];
|
||||
u8 n4 = src[48];
|
||||
dest[ydest] = (u32)n1 | ((u32)n2 << 8) | ((u32)n3 << 16) | ((u32)n4 << 24);
|
||||
src += 64;
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO: Can change rowWidth param below (leave above) to adjust dest pitch.
|
||||
DoUnswizzleTex16(texptr, dest, bxc, byc, rowWidth);
|
||||
}
|
||||
|
||||
void *TextureCacheCommon::RearrangeBuf(void *inBuf, u32 inRowBytes, u32 outRowBytes, int h, bool allowInPlace) {
|
||||
|
@ -197,9 +197,14 @@ static u32 QuickTexHashBasic(const void *checkp, u32 size) {
|
||||
return check;
|
||||
}
|
||||
|
||||
void DoSwizzleTex16(const u32 *ysrcp, u8 *texptr, int bxc, int byc, u32 pitch, u32 rowWidth) {
|
||||
void DoSwizzleTex16(const u32 *ysrcp, u8 *texptr, int bxc, int byc, u32 pitch) {
|
||||
// ysrcp is in 32-bits, so this is convenient.
|
||||
const u32 pitchBy32 = pitch >> 2;
|
||||
#ifdef _M_SSE
|
||||
__m128i *dest = (__m128i *)texptr;
|
||||
// The pitch parameter is in bytes, so shift down for 128-bit.
|
||||
// Note: it's always aligned to 16 bytes, so this is safe.
|
||||
const u32 pitchBy128 = pitch >> 4;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
const __m128i *xsrc = (const __m128i *)ysrcp;
|
||||
for (int bx = 0; bx < bxc; bx++) {
|
||||
@ -207,13 +212,13 @@ void DoSwizzleTex16(const u32 *ysrcp, u8 *texptr, int bxc, int byc, u32 pitch, u
|
||||
for (int n = 0; n < 2; n++) {
|
||||
// Textures are always 16-byte aligned so this is fine.
|
||||
__m128i temp1 = _mm_load_si128(src);
|
||||
src += pitch >> 2;
|
||||
src += pitchBy128;
|
||||
__m128i temp2 = _mm_load_si128(src);
|
||||
src += pitch >> 2;
|
||||
src += pitchBy128;
|
||||
__m128i temp3 = _mm_load_si128(src);
|
||||
src += pitch >> 2;
|
||||
src += pitchBy128;
|
||||
__m128i temp4 = _mm_load_si128(src);
|
||||
src += pitch >> 2;
|
||||
src += pitchBy128;
|
||||
|
||||
_mm_store_si128(dest, temp1);
|
||||
_mm_store_si128(dest + 1, temp2);
|
||||
@ -223,7 +228,7 @@ void DoSwizzleTex16(const u32 *ysrcp, u8 *texptr, int bxc, int byc, u32 pitch, u
|
||||
}
|
||||
xsrc++;
|
||||
}
|
||||
ysrcp += (rowWidth * 8) / 4;
|
||||
ysrcp += pitchBy32 * 8;
|
||||
}
|
||||
#else
|
||||
u32 *dest = (u32 *)texptr;
|
||||
@ -233,19 +238,24 @@ void DoSwizzleTex16(const u32 *ysrcp, u8 *texptr, int bxc, int byc, u32 pitch, u
|
||||
const u32 *src = xsrc;
|
||||
for (int n = 0; n < 8; n++) {
|
||||
memcpy(dest, src, 16);
|
||||
src += pitch;
|
||||
src += pitchBy32;
|
||||
dest += 4;
|
||||
}
|
||||
xsrc += 4;
|
||||
}
|
||||
ysrcp += (rowWidth * 8) / 4;
|
||||
ysrcp += pitchBy32 * 8;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void DoUnswizzleTex16Basic(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch, u32 rowWidth) {
|
||||
void DoUnswizzleTex16Basic(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch) {
|
||||
// ydestp is in 32-bits, so this is convenient.
|
||||
const u32 pitchBy32 = pitch >> 2;
|
||||
#ifdef _M_SSE
|
||||
const __m128i *src = (const __m128i *)texptr;
|
||||
// The pitch parameter is in bytes, so shift down for 128-bit.
|
||||
// Note: it's always aligned to 16 bytes, so this is safe.
|
||||
const u32 pitchBy128 = pitch >> 4;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
__m128i *xdest = (__m128i *)ydestp;
|
||||
for (int bx = 0; bx < bxc; bx++) {
|
||||
@ -257,18 +267,18 @@ void DoUnswizzleTex16Basic(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32
|
||||
__m128i temp3 = _mm_load_si128(src + 2);
|
||||
__m128i temp4 = _mm_load_si128(src + 3);
|
||||
_mm_store_si128(dest, temp1);
|
||||
dest += pitch >> 2;
|
||||
dest += pitchBy128;
|
||||
_mm_store_si128(dest, temp2);
|
||||
dest += pitch >> 2;
|
||||
dest += pitchBy128;
|
||||
_mm_store_si128(dest, temp3);
|
||||
dest += pitch >> 2;
|
||||
dest += pitchBy128;
|
||||
_mm_store_si128(dest, temp4);
|
||||
dest += pitch >> 2;
|
||||
dest += pitchBy128;
|
||||
src += 4;
|
||||
}
|
||||
xdest++;
|
||||
}
|
||||
ydestp += (rowWidth * 8) / 4;
|
||||
ydestp += pitchBy32 * 8;
|
||||
}
|
||||
#else
|
||||
const u32 *src = (const u32 *)texptr;
|
||||
@ -278,12 +288,12 @@ void DoUnswizzleTex16Basic(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32
|
||||
u32 *dest = xdest;
|
||||
for (int n = 0; n < 8; n++) {
|
||||
memcpy(dest, src, 16);
|
||||
dest += pitch;
|
||||
dest += pitchBy32;
|
||||
src += 4;
|
||||
}
|
||||
xdest += 4;
|
||||
}
|
||||
ydestp += (rowWidth * 8) / 4;
|
||||
ydestp += pitchBy32 * 8;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -32,14 +32,16 @@ enum CheckAlphaResult {
|
||||
|
||||
void SetupTextureDecoder();
|
||||
|
||||
void DoSwizzleTex16(const u32 *ysrcp, u8 *texptr, int bxc, int byc, u32 pitch, u32 rowWidth);
|
||||
// Pitch must be aligned to 16 bits (as is the case on a PSP)
|
||||
void DoSwizzleTex16(const u32 *ysrcp, u8 *texptr, int bxc, int byc, u32 pitch);
|
||||
|
||||
// For SSE, we statically link the SSE2 algorithms.
|
||||
#if defined(_M_SSE)
|
||||
u32 QuickTexHashSSE2(const void *checkp, u32 size);
|
||||
#define DoQuickTexHash QuickTexHashSSE2
|
||||
|
||||
void DoUnswizzleTex16Basic(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch, u32 rowWidth);
|
||||
// Pitch must be aligned to 16 bits (as is the case on a PSP)
|
||||
void DoUnswizzleTex16Basic(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch);
|
||||
#define DoUnswizzleTex16 DoUnswizzleTex16Basic
|
||||
|
||||
#include "ext/xxhash.h"
|
||||
@ -70,7 +72,7 @@ typedef u64 ReliableHashType;
|
||||
typedef u32 (*QuickTexHashFunc)(const void *checkp, u32 size);
|
||||
extern QuickTexHashFunc DoQuickTexHash;
|
||||
|
||||
typedef void (*UnswizzleTex16Func)(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch, u32 rowWidth);
|
||||
typedef void (*UnswizzleTex16Func)(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch);
|
||||
extern UnswizzleTex16Func DoUnswizzleTex16;
|
||||
|
||||
typedef u32 (*ReliableHash32Func)(const void *input, size_t len, u32 seed);
|
||||
|
Loading…
Reference in New Issue
Block a user