Added notes on the next steps for SDL 1.3

Moved fill and copy routines to their own files.

--HG--
rename : src/video/SDL_blit_copy.c => src/video/SDL_copy.c
rename : src/video/SDL_blit_copy.h => src/video/SDL_copy.h
extra : convert_revision : svn%3Ac70aab31-4412-0410-b14c-859654838e24/trunk%402615
This commit is contained in:
Sam Lantinga 2007-08-16 21:43:19 +00:00
parent beba16ed23
commit 5ac6d00012
6 changed files with 405 additions and 364 deletions

10
NOTES
View File

@ -157,6 +157,16 @@ Change textures to static/streaming. Static textures are not lockable,
streaming textures are lockable and may have system memory pixels available.
SDL_compat will use a streaming video texture, and will never be HWSURFACE,
but may be PREALLOC, if system memory pixels are available.
*** DONE Thu Aug 16 14:18:42 PDT 2007
The software renderer will be abstracted so the surface management can be
used by any renderer that provides functions to copy surfaces to the window.
Blitters...
----
Copy blit and fill rect are optimized with MMX and SSE now.
Here are the pieces we still need:
- Merging SDL texture capabilities into the SDL surface system
- Generic fallback blitter architecture
- Custom fast path blitters

View File

@ -24,7 +24,7 @@
#include "SDL_video.h"
#include "SDL_sysvideo.h"
#include "SDL_blit.h"
#include "SDL_blit_copy.h"
#include "SDL_copy.h"
#include "SDL_RLEaccel_c.h"
#include "SDL_pixels_c.h"

View File

@ -23,10 +23,38 @@
#include "SDL_video.h"
#include "SDL_blit.h"
#include "SDL_blit_copy.h"
#include "SDL_copy.h"
#ifdef __SSE__
/* This assumes 16-byte aligned src and dst */
static __inline__ void
SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
{
int i;
__m128 values[4];
for (i = len / 64; i--;) {
_mm_prefetch(src, _MM_HINT_NTA);
values[0] = *(__m128 *) (src + 0);
values[1] = *(__m128 *) (src + 16);
values[2] = *(__m128 *) (src + 32);
values[3] = *(__m128 *) (src + 48);
_mm_stream_ps((float *) (dst + 0), values[0]);
_mm_stream_ps((float *) (dst + 16), values[1]);
_mm_stream_ps((float *) (dst + 32), values[2]);
_mm_stream_ps((float *) (dst + 48), values[3]);
src += 64;
dst += 64;
}
if (len & 63)
SDL_memcpy(dst, src, len & 63);
}
#endif /* __SSE__ */
#ifdef __MMX__
/* This assumes 8-byte aligned src and dst */
static __inline__ void
SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
{
@ -60,32 +88,6 @@ SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
}
#endif /* __MMX__ */
#ifdef __SSE__
static __inline__ void
SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
{
int i;
__m128 values[4];
for (i = len / 64; i--;) {
_mm_prefetch(src, _MM_HINT_NTA);
values[0] = *(__m128 *) (src + 0);
values[1] = *(__m128 *) (src + 16);
values[2] = *(__m128 *) (src + 32);
values[3] = *(__m128 *) (src + 48);
_mm_stream_ps((float *) (dst + 0), values[0]);
_mm_stream_ps((float *) (dst + 16), values[1]);
_mm_stream_ps((float *) (dst + 32), values[2]);
_mm_stream_ps((float *) (dst + 48), values[3]);
src += 64;
dst += 64;
}
if (len & 63)
SDL_memcpy(dst, src, len & 63);
}
#endif /* __SSE__ */
void
SDL_BlitCopy(SDL_BlitInfo * info)
{

365
src/video/SDL_fill.c Normal file
View File

@ -0,0 +1,365 @@
/*
SDL - Simple DirectMedia Layer
Copyright (C) 1997-2006 Sam Lantinga
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Sam Lantinga
slouken@libsdl.org
*/
#include "SDL_config.h"
#include "SDL_video.h"
#include "SDL_blit.h"
#ifdef __SSE__
/* *INDENT-OFF* */
#ifdef _MSC_VER
#define SSE_BEGIN \
__m128 c128; \
c128.m128_u32[0] = color; \
c128.m128_u32[1] = color; \
c128.m128_u32[2] = color; \
c128.m128_u32[3] = color;
#else
#define SSE_BEGIN \
DECLARE_ALIGNED(Uint32, cccc[4], 16); \
cccc[0] = color; \
cccc[1] = color; \
cccc[2] = color; \
cccc[3] = color; \
__m128 c128 = *(__m128 *)cccc;
#endif
#define SSE_WORK \
for (i = n / 64; i--;) { \
_mm_stream_ps((float *)(p+0), c128); \
_mm_stream_ps((float *)(p+16), c128); \
_mm_stream_ps((float *)(p+32), c128); \
_mm_stream_ps((float *)(p+48), c128); \
p += 64; \
}
#define SSE_END
#define DEFINE_SSE_FILLRECT(bpp, type) \
static void \
SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
{ \
SSE_BEGIN; \
\
while (h--) { \
int i, n = w * bpp; \
Uint8 *p = pixels; \
\
if (n > 15) { \
int adjust = 16 - ((uintptr_t)p & 15); \
if (adjust < 16) { \
n -= adjust; \
adjust /= bpp; \
while(adjust--) { \
*((type *)p) = (type)color; \
p += bpp; \
} \
} \
SSE_WORK; \
} \
if (n & 63) { \
int remainder = (n & 63); \
remainder /= bpp; \
while(remainder--) { \
*((type *)p) = (type)color; \
p += bpp; \
} \
} \
pixels += pitch; \
} \
\
SSE_END; \
}
DEFINE_SSE_FILLRECT(1, Uint8)
DEFINE_SSE_FILLRECT(2, Uint16)
DEFINE_SSE_FILLRECT(4, Uint32)
/* *INDENT-ON* */
#endif /* __SSE__ */
#ifdef __MMX__
/* *INDENT-OFF* */
#define MMX_BEGIN \
__m64 c64 = _mm_set_pi32(color, color)
#define MMX_WORK \
for (i = n / 64; i--;) { \
_mm_stream_pi((__m64 *)(p+0), c64); \
_mm_stream_pi((__m64 *)(p+8), c64); \
_mm_stream_pi((__m64 *)(p+16), c64); \
_mm_stream_pi((__m64 *)(p+24), c64); \
_mm_stream_pi((__m64 *)(p+32), c64); \
_mm_stream_pi((__m64 *)(p+40), c64); \
_mm_stream_pi((__m64 *)(p+48), c64); \
_mm_stream_pi((__m64 *)(p+56), c64); \
p += 64; \
}
#define MMX_END \
_mm_empty()
#define DEFINE_MMX_FILLRECT(bpp, type) \
static void \
SDL_FillRect##bpp##MMX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
{ \
MMX_BEGIN; \
\
while (h--) { \
int i, n = w * bpp; \
Uint8 *p = pixels; \
\
if (n > 7) { \
int adjust = 8 - ((uintptr_t)p & 7); \
if (adjust < 8) { \
n -= adjust; \
adjust /= bpp; \
while(adjust--) { \
*((type *)p) = (type)color; \
p += bpp; \
} \
} \
MMX_WORK; \
} \
if (n & 63) { \
int remainder = (n & 63); \
remainder /= bpp; \
while(remainder--) { \
*((type *)p) = (type)color; \
p += bpp; \
} \
} \
pixels += pitch; \
} \
\
MMX_END; \
}
DEFINE_MMX_FILLRECT(1, Uint8)
DEFINE_MMX_FILLRECT(2, Uint16)
DEFINE_MMX_FILLRECT(4, Uint32)
/* *INDENT-ON* */
#endif /* __MMX__ */
static void
SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
{
while (h--) {
int n = w;
Uint8 *p = pixels;
if (n > 3) {
switch ((uintptr_t) p & 3) {
case 1:
*p++ = (Uint8) color;
--n;
case 2:
*p++ = (Uint8) color;
--n;
case 3:
*p++ = (Uint8) color;
--n;
}
SDL_memset4(p, color, (n >> 2));
}
if (n & 3) {
p += (n & ~3);
switch (n & 3) {
case 3:
*p++ = (Uint8) color;
case 2:
*p++ = (Uint8) color;
case 1:
*p++ = (Uint8) color;
}
}
pixels += pitch;
}
}
static void
SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
{
while (h--) {
int n = w;
Uint16 *p = (Uint16 *) pixels;
if (n > 1) {
if ((uintptr_t) p & 2) {
*p++ = (Uint16) color;
--n;
}
SDL_memset4(p, color, (n >> 1));
}
if (n & 1) {
p[n - 1] = (Uint16) color;
}
pixels += pitch;
}
}
static void
SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
{
Uint8 r = (Uint8) ((color >> 16) & 0xFF);
Uint8 g = (Uint8) ((color >> 8) & 0xFF);
Uint8 b = (Uint8) (color & 0xFF);
while (h--) {
int n = w;
Uint8 *p = pixels;
while (n--) {
*p++ = r;
*p++ = g;
*p++ = b;
}
pixels += pitch;
}
}
static void
SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
{
while (h--) {
SDL_memset4(pixels, color, w);
pixels += pitch;
}
}
/*
* This function performs a fast fill of the given rectangle with 'color'
*/
int
SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color)
{
Uint8 *pixels;
/* This function doesn't work on surfaces < 8 bpp */
if (dst->format->BitsPerPixel < 8) {
SDL_SetError("SDL_FillRect(): Unsupported surface format");
return (-1);
}
/* If 'dstrect' == NULL, then fill the whole surface */
if (dstrect) {
/* Perform clipping */
if (!SDL_IntersectRect(dstrect, &dst->clip_rect, dstrect)) {
return (0);
}
} else {
dstrect = &dst->clip_rect;
}
/* Perform software fill */
if (!dst->pixels) {
SDL_SetError("SDL_FillRect(): You must lock the surface");
return (-1);
}
pixels =
(Uint8 *) dst->pixels + dstrect->y * dst->pitch +
dstrect->x * dst->format->BytesPerPixel;
switch (dst->format->BytesPerPixel) {
case 1:
{
color |= (color << 8);
color |= (color << 16);
#ifdef __SSE__
if (SDL_HasSSE()) {
SDL_FillRect1SSE(pixels, dst->pitch, color, dstrect->w,
dstrect->h);
break;
}
#endif
#ifdef __MMX__
if (SDL_HasMMX()) {
SDL_FillRect1MMX(pixels, dst->pitch, color, dstrect->w,
dstrect->h);
break;
}
#endif
SDL_FillRect1(pixels, dst->pitch, color, dstrect->w, dstrect->h);
break;
}
case 2:
{
color |= (color << 16);
#ifdef __SSE__
if (SDL_HasSSE()) {
SDL_FillRect2SSE(pixels, dst->pitch, color, dstrect->w,
dstrect->h);
break;
}
#endif
#ifdef __MMX__
if (SDL_HasMMX()) {
SDL_FillRect2MMX(pixels, dst->pitch, color, dstrect->w,
dstrect->h);
break;
}
#endif
SDL_FillRect2(pixels, dst->pitch, color, dstrect->w, dstrect->h);
break;
}
case 3:
/* 24-bit RGB is a slow path, at least for now. */
{
SDL_FillRect3(pixels, dst->pitch, color, dstrect->w, dstrect->h);
break;
}
case 4:
{
#ifdef __SSE__
if (SDL_HasSSE()) {
SDL_FillRect4SSE(pixels, dst->pitch, color, dstrect->w,
dstrect->h);
break;
}
#endif
#ifdef __MMX__
if (SDL_HasMMX()) {
SDL_FillRect4MMX(pixels, dst->pitch, color, dstrect->w,
dstrect->h);
break;
}
#endif
SDL_FillRect4(pixels, dst->pitch, color, dstrect->w, dstrect->h);
break;
}
}
SDL_UnlockSurface(dst);
/* We're done! */
return (0);
}
/* vi: set ts=4 sw=4 expandtab: */

View File

@ -509,342 +509,6 @@ SDL_UpperBlit(SDL_Surface * src, SDL_Rect * srcrect,
return 0;
}
#ifdef __SSE__
/* *INDENT-OFF* */
#ifdef _MSC_VER
#define SSE_BEGIN \
__m128 c128; \
c128.m128_u32[0] = color; \
c128.m128_u32[1] = color; \
c128.m128_u32[2] = color; \
c128.m128_u32[3] = color;
#else
#define SSE_BEGIN \
DECLARE_ALIGNED(Uint32, cccc[4], 16); \
cccc[0] = color; \
cccc[1] = color; \
cccc[2] = color; \
cccc[3] = color; \
__m128 c128 = *(__m128 *)cccc;
#endif
#define SSE_WORK \
for (i = n / 64; i--;) { \
_mm_stream_ps((float *)(p+0), c128); \
_mm_stream_ps((float *)(p+16), c128); \
_mm_stream_ps((float *)(p+32), c128); \
_mm_stream_ps((float *)(p+48), c128); \
p += 64; \
}
#define SSE_END
#define DEFINE_SSE_FILLRECT(bpp, type) \
static void \
SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
{ \
SSE_BEGIN; \
\
while (h--) { \
int i, n = w * bpp; \
Uint8 *p = pixels; \
\
if (n > 15) { \
int adjust = 16 - ((uintptr_t)p & 15); \
if (adjust < 16) { \
n -= adjust; \
adjust /= bpp; \
while(adjust--) { \
*((type *)p) = (type)color; \
p += bpp; \
} \
} \
SSE_WORK; \
} \
if (n & 63) { \
int remainder = (n & 63); \
remainder /= bpp; \
while(remainder--) { \
*((type *)p) = (type)color; \
p += bpp; \
} \
} \
pixels += pitch; \
} \
\
SSE_END; \
}
DEFINE_SSE_FILLRECT(1, Uint8)
DEFINE_SSE_FILLRECT(2, Uint16)
DEFINE_SSE_FILLRECT(4, Uint32)
/* *INDENT-ON* */
#endif /* __SSE__ */
#ifdef __MMX__
/* *INDENT-OFF* */
#define MMX_BEGIN \
__m64 c64 = _mm_set_pi32(color, color)
#define MMX_WORK \
for (i = n / 64; i--;) { \
_mm_stream_pi((__m64 *)(p+0), c64); \
_mm_stream_pi((__m64 *)(p+8), c64); \
_mm_stream_pi((__m64 *)(p+16), c64); \
_mm_stream_pi((__m64 *)(p+24), c64); \
_mm_stream_pi((__m64 *)(p+32), c64); \
_mm_stream_pi((__m64 *)(p+40), c64); \
_mm_stream_pi((__m64 *)(p+48), c64); \
_mm_stream_pi((__m64 *)(p+56), c64); \
p += 64; \
}
#define MMX_END \
_mm_empty()
#define DEFINE_MMX_FILLRECT(bpp, type) \
static void \
SDL_FillRect##bpp##MMX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
{ \
MMX_BEGIN; \
\
while (h--) { \
int i, n = w * bpp; \
Uint8 *p = pixels; \
\
if (n > 7) { \
int adjust = 8 - ((uintptr_t)p & 7); \
if (adjust < 8) { \
n -= adjust; \
adjust /= bpp; \
while(adjust--) { \
*((type *)p) = (type)color; \
p += bpp; \
} \
} \
MMX_WORK; \
} \
if (n & 63) { \
int remainder = (n & 63); \
remainder /= bpp; \
while(remainder--) { \
*((type *)p) = (type)color; \
p += bpp; \
} \
} \
pixels += pitch; \
} \
\
MMX_END; \
}
DEFINE_MMX_FILLRECT(1, Uint8)
DEFINE_MMX_FILLRECT(2, Uint16)
DEFINE_MMX_FILLRECT(4, Uint32)
/* *INDENT-ON* */
#endif /* __MMX__ */
static void
SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
{
while (h--) {
int n = w;
Uint8 *p = pixels;
if (n > 3) {
switch ((uintptr_t) p & 3) {
case 1:
*p++ = (Uint8) color;
--n;
case 2:
*p++ = (Uint8) color;
--n;
case 3:
*p++ = (Uint8) color;
--n;
}
SDL_memset4(p, color, (n >> 2));
}
if (n & 3) {
p += (n & ~3);
switch (n & 3) {
case 3:
*p++ = (Uint8) color;
case 2:
*p++ = (Uint8) color;
case 1:
*p++ = (Uint8) color;
}
}
pixels += pitch;
}
}
static void
SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
{
while (h--) {
int n = w;
Uint16 *p = (Uint16 *) pixels;
if (n > 1) {
if ((uintptr_t) p & 2) {
*p++ = (Uint16) color;
--n;
}
SDL_memset4(p, color, (n >> 1));
}
if (n & 1) {
p[n - 1] = (Uint16) color;
}
pixels += pitch;
}
}
static void
SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
{
Uint8 r = (Uint8) (color & 0xFF);
Uint8 g = (Uint8) ((color >> 8) & 0xFF);
Uint8 b = (Uint8) ((color >> 16) & 0xFF);
while (h--) {
int n = w;
Uint8 *p = pixels;
while (n--) {
*p++ = r;
*p++ = g;
*p++ = b;
}
pixels += pitch;
}
}
static void
SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
{
while (h--) {
SDL_memset4(pixels, color, w);
pixels += pitch;
}
}
/*
* This function performs a fast fill of the given rectangle with 'color'
*/
int
SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color)
{
Uint8 *pixels;
/* This function doesn't work on surfaces < 8 bpp */
if (dst->format->BitsPerPixel < 8) {
SDL_SetError("Fill rect on unsupported surface format");
return (-1);
}
/* If 'dstrect' == NULL, then fill the whole surface */
if (dstrect) {
/* Perform clipping */
if (!SDL_IntersectRect(dstrect, &dst->clip_rect, dstrect)) {
return (0);
}
} else {
dstrect = &dst->clip_rect;
}
/* Perform software fill */
if (SDL_LockSurface(dst) != 0) {
return (-1);
}
pixels =
(Uint8 *) dst->pixels + dstrect->y * dst->pitch +
dstrect->x * dst->format->BytesPerPixel;
switch (dst->format->BytesPerPixel) {
case 1:
{
color |= (color << 8);
color |= (color << 16);
#ifdef __SSE__
if (SDL_HasSSE()) {
SDL_FillRect1SSE(pixels, dst->pitch, color, dstrect->w,
dstrect->h);
break;
}
#endif
#ifdef __MMX__
if (SDL_HasMMX()) {
SDL_FillRect1MMX(pixels, dst->pitch, color, dstrect->w,
dstrect->h);
break;
}
#endif
SDL_FillRect1(pixels, dst->pitch, color, dstrect->w, dstrect->h);
break;
}
case 2:
{
color |= (color << 16);
#ifdef __SSE__
if (SDL_HasSSE()) {
SDL_FillRect2SSE(pixels, dst->pitch, color, dstrect->w,
dstrect->h);
break;
}
#endif
#ifdef __MMX__
if (SDL_HasMMX()) {
SDL_FillRect2MMX(pixels, dst->pitch, color, dstrect->w,
dstrect->h);
break;
}
#endif
SDL_FillRect2(pixels, dst->pitch, color, dstrect->w, dstrect->h);
break;
}
case 3:
/* 24-bit RGB is a slow path, at least for now. */
{
SDL_FillRect3(pixels, dst->pitch, color, dstrect->w, dstrect->h);
break;
}
case 4:
{
#ifdef __SSE__
if (SDL_HasSSE()) {
SDL_FillRect4SSE(pixels, dst->pitch, color, dstrect->w,
dstrect->h);
break;
}
#endif
#ifdef __MMX__
if (SDL_HasMMX()) {
SDL_FillRect4MMX(pixels, dst->pitch, color, dstrect->w,
dstrect->h);
break;
}
#endif
SDL_FillRect4(pixels, dst->pitch, color, dstrect->w, dstrect->h);
break;
}
}
SDL_UnlockSurface(dst);
/* We're done! */
return (0);
}
/*
* Lock a surface to directly access the pixels
*/