mirror of
https://github.com/libretro/scummvm.git
synced 2025-03-05 17:57:14 +00:00
GRAPHICS: ATARI: Align surface on a 16-byte boundary
Also implement a CPU-based optimization for the 68040 / 68060.
This commit is contained in:
parent
d3be1b0c34
commit
06af761337
@ -1001,7 +1001,7 @@ void AtariGraphicsManager::Cursor::setSurface(const void *buf, int w, int h, int
|
||||
if (surface.w != w || surface.h != h || surface.format != format)
|
||||
surface.create(w, h, format);
|
||||
|
||||
surface.copyRectToSurface(buf, surface.pitch, 0, 0, w, h);
|
||||
surface.copyRectToSurface(buf, w * format.bytesPerPixel, 0, 0, w, h);
|
||||
|
||||
hotspotX = _hotspotX;
|
||||
hotspotY = _hotspotY;
|
||||
|
@ -22,15 +22,24 @@
|
||||
#include "graphics/blit.h"
|
||||
#include "graphics/surface.h"
|
||||
|
||||
#include <cstdlib> // calloc
|
||||
#include <cstring> // memcpy
|
||||
#include <cstdlib> // malloc
|
||||
#include <cstring> // memcpy, memset
|
||||
#include <mint/cookie.h>
|
||||
#include <mint/falcon.h>
|
||||
|
||||
#include "backends/graphics/atari/atari-graphics-superblitter.h"
|
||||
#include "common/textconsole.h" // error
|
||||
|
||||
static inline bool hasMove16() {
|
||||
long val;
|
||||
static bool hasMove16 = Getcookie(C__CPU, &val) == C_FOUND && val >= 40;
|
||||
return hasMove16;
|
||||
}
|
||||
|
||||
namespace Graphics {
|
||||
|
||||
constexpr size_t ALIGN = 16; // 16 bytes
|
||||
|
||||
// hijack surface overrides here as well as these are tightly related
|
||||
// to the blitting routine below
|
||||
void Surface::create(int16 width, int16 height, const PixelFormat &f) {
|
||||
@ -40,24 +49,41 @@ void Surface::create(int16 width, int16 height, const PixelFormat &f) {
|
||||
w = width;
|
||||
h = height;
|
||||
format = f;
|
||||
pitch = w * format.bytesPerPixel;
|
||||
// align pitch to a 16-byte boundary for a possible C2P conversion
|
||||
pitch = (w * format.bytesPerPixel + ALIGN - 1) & (-ALIGN);
|
||||
|
||||
if (width && height) {
|
||||
if (VgetMonitor() == MON_VGA && Getcookie(C_SupV, NULL) == C_FOUND)
|
||||
pixels = (void*)ct60_vmalloc(width * height * format.bytesPerPixel);
|
||||
else
|
||||
pixels = calloc(width * height, format.bytesPerPixel);
|
||||
assert(pixels);
|
||||
if (VgetMonitor() == MON_VGA && Getcookie(C_SupV, NULL) == C_FOUND) {
|
||||
pixels = (void *)ct60_vmalloc(height * pitch);
|
||||
|
||||
if (!pixels)
|
||||
error("Not enough SVRAM to allocate a surface");
|
||||
|
||||
assert((uintptr)pixels >= 0xA0000000);
|
||||
} else {
|
||||
// align buffer to a 16-byte boundary for move16 or C2P conversion
|
||||
void *pixelsUnaligned = ::malloc(sizeof(uintptr) + (height * pitch) + ALIGN - 1);
|
||||
|
||||
if (!pixelsUnaligned)
|
||||
error("Not enough memory to allocate a surface");
|
||||
|
||||
pixels = (void *)(((uintptr)pixelsUnaligned + sizeof(uintptr) + ALIGN - 1) & (-ALIGN));
|
||||
|
||||
// store the unaligned pointer for later free()
|
||||
*((uintptr *)pixels - 1) = (uintptr)pixelsUnaligned;
|
||||
}
|
||||
|
||||
memset(pixels, 0, height * pitch);
|
||||
}
|
||||
}
|
||||
|
||||
void Surface::free() {
|
||||
if (((uintptr)pixels & 0xFF000000) >= 0xA0000000)
|
||||
ct60_vmfree(pixels);
|
||||
else
|
||||
::free(pixels);
|
||||
else if (pixels)
|
||||
::free((void *)*((uintptr *)pixels - 1));
|
||||
|
||||
pixels = 0;
|
||||
pixels = nullptr;
|
||||
w = h = pitch = 0;
|
||||
format = PixelFormat();
|
||||
}
|
||||
@ -87,12 +113,136 @@ void copyBlit(byte *dst, const byte *src,
|
||||
// wait until we finish otherwise we may overwrite pixels written manually afterwards
|
||||
while (*SV_BLITTER_CONTROL & 1);
|
||||
} else if (dstPitch == srcPitch && ((w * bytesPerPixel) == dstPitch)) {
|
||||
memcpy(dst, src, dstPitch * h);
|
||||
if (hasMove16() && ((uintptr)src & (ALIGN - 1)) == 0 && ((uintptr)dst & (ALIGN - 1)) == 0) {
|
||||
__asm__ volatile(
|
||||
" move.l %2,d0\n"
|
||||
" lsr.l #4,d0\n"
|
||||
" beq.b 3f\n"
|
||||
|
||||
" moveq #0x0f,d1\n"
|
||||
" and.l d0,d1\n"
|
||||
" neg.l d1\n"
|
||||
" lsr.l #4,d0\n"
|
||||
" jmp (2f,pc,d1.l*4)\n"
|
||||
"1:\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
"2:\n"
|
||||
" dbra d0,1b\n"
|
||||
// handle also the unlikely case when 'dstPitch'
|
||||
// is not divisible by 16 but 'src' and 'dst' are
|
||||
"3:\n"
|
||||
" moveq #0x0f,d0\n"
|
||||
" and.l %2,d0\n"
|
||||
" neg.l d0\n"
|
||||
" jmp (4f,pc,d0.l*2)\n"
|
||||
// only 15x move.b as 16 would be handled above
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
"4:\n"
|
||||
: // outputs
|
||||
: "a"(src), "a"(dst), "g"(dstPitch * h) // inputs
|
||||
: "d0", "d1", "cc" AND_MEMORY
|
||||
);
|
||||
} else {
|
||||
memcpy(dst, src, dstPitch * h);
|
||||
}
|
||||
} else {
|
||||
for (uint i = 0; i < h; ++i) {
|
||||
memcpy(dst, src, w * bytesPerPixel);
|
||||
dst += dstPitch;
|
||||
src += srcPitch;
|
||||
if (hasMove16() && ((uintptr)src & (ALIGN - 1)) == 0 && ((uintptr)dst & (ALIGN - 1)) == 0) {
|
||||
__asm__ volatile(
|
||||
"0:\n"
|
||||
" move.l %2,d0\n"
|
||||
" lsr.l #4,d0\n"
|
||||
" beq.b 3f\n"
|
||||
|
||||
" moveq #0x0f,d1\n"
|
||||
" and.l d0,d1\n"
|
||||
" neg.l d1\n"
|
||||
" lsr.l #4,d0\n"
|
||||
" jmp (2f,pc,d1.l*4)\n"
|
||||
"1:\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
"2:\n"
|
||||
" dbra d0,1b\n"
|
||||
// handle (w * bytesPerPixel) % 16
|
||||
"3:\n"
|
||||
" moveq #0x0f,d0\n"
|
||||
" and.l %2,d0\n"
|
||||
" neg.l d0\n"
|
||||
" jmp (4f,pc,d0.l*2)\n"
|
||||
// only 15x move.b as 16 would be handled above
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
"4:\n"
|
||||
" add.l %4,%1\n"
|
||||
" add.l %5,%0\n"
|
||||
" dbra %3,0b\n"
|
||||
: // outputs
|
||||
: "a"(src), "a"(dst), "g"(w * bytesPerPixel), "d"(h - 1),
|
||||
"g"(dstPitch - w * bytesPerPixel), "g"(srcPitch - w * bytesPerPixel) // inputs
|
||||
: "d0", "d1", "d2", "cc" AND_MEMORY
|
||||
);
|
||||
} else {
|
||||
for (uint i = 0; i < h; ++i) {
|
||||
memcpy(dst, src, w * bytesPerPixel);
|
||||
dst += dstPitch;
|
||||
src += srcPitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user