scummvm/backends/platform/psp/memory.cpp
2021-12-26 18:48:43 +01:00

404 lines
10 KiB
C++

/* ScummVM - Graphic Adventure Engine
*
* ScummVM is the legal property of its developers, whose names
* are too numerous to list here. Please refer to the COPYRIGHT
* file distributed with this source distribution.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include "common/scummsys.h"
#include "common/singleton.h"
#include "backends/platform/psp/psppixelformat.h"
#define PSP_INCLUDE_SWAP
#include "backends/platform/psp/memory.h"
// Class Copier --------------------------------------------------------------------------
//#define __PSP_DEBUG_FUNCS__ /* For debugging the stack */
//#define __PSP_DEBUG_PRINT__
#include "backends/platform/psp/trace.h"
//#define TEST_MEMORY_COPY
void PspMemory::copy(byte *dst, const byte *src, uint32 bytes) {
DEBUG_ENTER_FUNC();
#ifdef TEST_MEMORY_COPY
uint32 debugBytes = bytes;
const byte *debugDst = dst, *debugSrc = src;
#endif
PSP_DEBUG_PRINT("copy(): dst[%p], src[%p], bytes[%d]\n", dst, src, bytes);
// align the destination pointer first
uint32 prefixDst = (((uint32)dst) & 0x3);
if (prefixDst) {
prefixDst = 4 - prefixDst; // prefix only if we have address % 4 != 0
PSP_DEBUG_PRINT("prefixDst[%d]\n", prefixDst);
bytes -= prefixDst; // remember we assume bytes >= 4
if (bytes < MIN_AMOUNT_FOR_COMPLEX_COPY) { // check if it's worthwhile to continue
copy8(dst, src, bytes + prefixDst);
#ifdef TEST_MEMORY_COPY
testCopy(debugDst, debugSrc, debugBytes);
#endif
return;
}
while (prefixDst--) {
*dst++ = *src++;
}
}
// check the source pointer alignment now
uint32 alignSrc = (((uint32)src) & 0x3);
if (alignSrc) { // we'll need to realign our reads
copy32Misaligned((uint32 *)dst, src, bytes, alignSrc);
} else {
copy32Aligned((uint32 *)dst, (const uint32 *)src, bytes);
}
#ifdef TEST_MEMORY_COPY
testCopy(debugDst, debugSrc, debugBytes);
#endif
}
void PspMemory::copy32Aligned(uint32 *dst32, const uint32 *src32, uint32 bytes) {
PSP_DEBUG_PRINT("copy32Aligned(): dst32[%p], src32[%p], bytes[%d]\n", dst32, src32, bytes);
int words8 = bytes >> 5;
// try blocks of 8 words at a time
if (words8) {
while (words8--) {
uint32 a, b, c, d;
a = src32[0];
b = src32[1];
c = src32[2];
d = src32[3];
dst32[0] = a;
dst32[1] = b;
dst32[2] = c;
dst32[3] = d;
a = src32[4];
b = src32[5];
c = src32[6];
d = src32[7];
dst32[4] = a;
dst32[5] = b;
dst32[6] = c;
dst32[7] = d;
dst32 += 8;
src32 += 8;
}
}
int words4 = (bytes & 0x1F) >> 4;
// try blocks of 4 words at a time
if (words4) {
uint32 a, b, c, d;
a = src32[0];
b = src32[1];
c = src32[2];
d = src32[3];
dst32[0] = a;
dst32[1] = b;
dst32[2] = c;
dst32[3] = d;
dst32 += 4;
src32 += 4;
}
int bytesLeft = (bytes & 0xF); // only look at bytes left after we did the above
int wordsLeft = bytesLeft >> 2;
// now just do single words
while (wordsLeft) {
*dst32++ = *src32++;
wordsLeft--;
}
bytesLeft = bytes & 0x3; // get remaining bytes
PSP_DEBUG_PRINT("bytesLeft[%d]\n", bytesLeft);
byte *dst = (byte *)dst32;
const byte *src = (const byte *)src32;
while (bytesLeft--) {
*dst++ = *src++;
}
}
// More challenging -- need to shift
// Assume dst is aligned
void PspMemory::copy32Misaligned(uint32 *dst32, const byte *src, uint32 bytes, uint32 alignSrc) {
PSP_DEBUG_PRINT("copy32Misaligned: dst32[%p], src[%p], bytes[%d], alignSrc[%d]\n", dst32, src, bytes, alignSrc);
uint32 *src32 = (uint32 *)(((uint32)src) & 0xFFFFFFFC); // remove misalignment
uint32 shiftValue, lastShiftValue;
switch (alignSrc) {
case 1:
shiftValue = 8;
lastShiftValue = 24;
break;
case 2:
shiftValue = 16;
lastShiftValue = 16;
break;
default: /* 3 */
shiftValue = 24;
lastShiftValue = 8;
break;
}
uint32 dstWord, srcWord;
// Try to do groups of 4 words
uint32 words4 = bytes >> 4;
srcWord = *src32; // preload 1st word so we read ahead
for (; words4; words4--) {
dstWord = srcWord >> shiftValue;
srcWord = src32[1];
dstWord |= srcWord << lastShiftValue;
dst32[0] = dstWord;
dstWord = srcWord >> shiftValue;
srcWord = src32[2];
dstWord |= srcWord << lastShiftValue;
dst32[1] = dstWord;
dstWord = srcWord >> shiftValue;
srcWord = src32[3];
dstWord |= srcWord << lastShiftValue;
dst32[2] = dstWord;
dstWord = srcWord >> shiftValue;
srcWord = src32[4];
dstWord |= srcWord << lastShiftValue;
dst32[3] = dstWord;
src32 += 4;
dst32 += 4;
}
uint32 words = (bytes & 0xF) >> 2; // now get remaining words
// we read one word ahead of what we write
// setup the first read
for (; words ;words--) {
dstWord = srcWord >> shiftValue;
srcWord = src32[1]; // we still go one ahead
src32++;
dstWord |= srcWord << lastShiftValue;
*dst32++ = dstWord;
}
uint32 bytesLeft = bytes & 3; // and remaining bytes
if (bytesLeft) {
byte *dst8 = (byte *)dst32;
byte *src8 = ((byte *)src32) + ((uint32)src & 0x3); // get exact location we should be at
for(; bytesLeft; bytesLeft--) {
*dst8++ = *src8++;
}
}
}
void PspMemory::testCopy(const byte *debugDst, const byte *debugSrc, uint32 debugBytes) {
bool mismatch = false;
PSP_INFO_PRINT("testing fastCopy...");
for (uint32 i = 0; i < debugBytes; i++) {
if (debugDst[i] != debugSrc[i]) {
if (!mismatch) {
PSP_INFO_PRINT("**** mismatch in copy! ****\n");
PSP_INFO_PRINT("dst[%p], src[%p], bytes[%u]\n", debugDst, debugSrc, debugBytes);
mismatch = true;
}
PSP_INFO_PRINT("[%d]%x!=%x ", i, debugSrc[i], debugDst[i]);
}
}
if (mismatch) {
PSP_INFO_PRINT("\n");
} else {
PSP_INFO_PRINT("ok\n");
}
}
//
// used to swap red and blue
void PspMemorySwap::swap(uint16 *dst16, const uint16 *src16, uint32 bytes, PSPPixelFormat &format) {
DEBUG_ENTER_FUNC();
#ifdef TEST_MEMORY_COPY
uint32 debugBytes = bytes;
const uint16 *debugDst = dst16, *debugSrc = src16;
#endif
// align the destination pointer first
uint32 prefixDst = (((uint32)dst16) & 0x3); // for swap, we can only have 2 or 0 as our prefix
if (prefixDst) {
bytes -= prefixDst; // remember we assume bytes > 4
*dst16++ = format.swapRedBlue16(*src16++);
if (bytes < MIN_AMOUNT_FOR_COMPLEX_COPY) { // check if it's worthwhile to continue
swap16(dst16, src16, bytes, format);
#ifdef TEST_MEMORY_COPY
testSwap(debugDst, debugSrc, debugBytes, format);
#endif
return;
}
}
// check the source pointer alignment now
uint32 alignSrc = (((uint32)src16) & 0x3);
if (alignSrc) { // we'll need to realign our reads
PSP_DEBUG_PRINT("misaligned copy of %u bytes from %p to %p\n", bytes, src16, dst16);
swap32Misaligned((uint32 *)dst16, src16, bytes, format);
} else {
swap32Aligned((uint32 *)dst16, (const uint32 *)src16, bytes, format);
}
#ifdef TEST_MEMORY_COPY
testSwap(debugDst, debugSrc, debugBytes, format);
#endif
}
void PspMemorySwap::testSwap(const uint16 *debugDst, const uint16 *debugSrc, uint32 debugBytes, PSPPixelFormat &format) {
bool mismatch = false;
PSP_INFO_PRINT("testing fastSwap...");
uint32 shorts = debugBytes >> 1;
for (uint32 i = 0; i < shorts; i++) {
if (debugDst[i] != format.swapRedBlue16(debugSrc[i])) {
if (!mismatch) {
PSP_INFO_PRINT("**** mismatch in swap! ****\n");
PSP_INFO_PRINT("dst[%p], src[%p], bytes[%u]\n", debugDst, debugSrc, debugBytes);
mismatch = true;
}
PSP_INFO_PRINT("[%d]%x!=%x ", i<<1, format.swapRedBlue16(debugSrc[i]), debugDst[i]);
}
}
if (mismatch) {
PSP_INFO_PRINT("\n");
} else {
PSP_INFO_PRINT("ok\n");
}
}
void PspMemorySwap::swap32Aligned(uint32 *dst32, const uint32 *src32, uint32 bytes, PSPPixelFormat &format) {
DEBUG_ENTER_FUNC();
int words4 = bytes >> 4;
// try blocks of 4 words at a time
while (words4--) {
uint32 a, b, c, d;
a = format.swapRedBlue32(src32[0]);
b = format.swapRedBlue32(src32[1]);
c = format.swapRedBlue32(src32[2]);
d = format.swapRedBlue32(src32[3]);
dst32[0] = a;
dst32[1] = b;
dst32[2] = c;
dst32[3] = d;
dst32 += 4;
src32 += 4;
}
uint32 bytesLeft = bytes & 0xF;
uint32 words = bytesLeft >> 2;
// now just do words
while (words--) {
*dst32++ = format.swapRedBlue32(*src32++);
}
bytesLeft = bytes & 0x3;
if (bytesLeft) { // for swap, can only be 1 short left
*((uint16 *)dst32) = format.swapRedBlue16(*((const uint16 *)src32));
}
}
// More challenging -- need to shift
// We assume dst is aligned
void PspMemorySwap::swap32Misaligned(uint32 *dst32, const uint16 *src16, uint32 bytes, PSPPixelFormat &format) {
DEBUG_ENTER_FUNC();
const uint32 shiftValue = 16;
uint32 *src32 = (uint32 *)(((uint32)src16) & 0xFFFFFFFC); // remove misalignment
// Try to do groups of 4 words
uint32 words4 = bytes >> 4;
uint32 srcWord = src32[0]; // preload
while (words4--) {
uint32 dstWord = srcWord >> shiftValue;
srcWord = src32[1];
dstWord |= srcWord << shiftValue;
dst32[0] = format.swapRedBlue32(dstWord);
dstWord = srcWord >> shiftValue;
srcWord = src32[2];
dstWord |= srcWord << shiftValue;
dst32[1] = format.swapRedBlue32(dstWord);
dstWord = srcWord >> shiftValue;
srcWord = src32[3];
dstWord |= srcWord << shiftValue;
dst32[2] = format.swapRedBlue32(dstWord);
dstWord = srcWord >> shiftValue;
srcWord = src32[4];
dstWord |= srcWord << shiftValue;
dst32[3] = format.swapRedBlue32(dstWord);
src32 += 4;
dst32 += 4;
}
uint32 words = (bytes & 0xF) >> 2;
// we read one word ahead of what we write
// setup the first read
if (words) {
//srcWord = *src32++; // don't need this. already loaded
src32++; // we already have the value loaded in
while (words--) {
uint32 dstWord = srcWord >> shiftValue;
srcWord = *src32++;
dstWord |= srcWord << shiftValue;
*dst32++ = format.swapRedBlue32(dstWord);
}
}
uint32 bytesLeft = bytes & 3;
if (bytesLeft) { // for swap, can only be 1 short left
*((uint16 *)dst32) = format.swapRedBlue16((uint16)(srcWord >> shiftValue));
}
}