mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2024-11-26 04:50:25 +00:00
runtime cpu detection
Originally committed as revision 3144 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
This commit is contained in:
parent
48a05ceccd
commit
43d8c23cbf
@ -6,10 +6,6 @@ LIBNAME = libpostproc.a
|
|||||||
SRCS=postprocess.c swscale.c rgb2rgb.c yuv2rgb.c
|
SRCS=postprocess.c swscale.c rgb2rgb.c yuv2rgb.c
|
||||||
OBJS=$(SRCS:.c=.o)
|
OBJS=$(SRCS:.c=.o)
|
||||||
|
|
||||||
ifeq ($(TARGET_ARCH_X86),yes)
|
|
||||||
SRCS += yuv2rgb_mmx.c
|
|
||||||
endif
|
|
||||||
|
|
||||||
CFLAGS = $(OPTFLAGS) $(MLIB_INC) -I. -I.. -Wall $(EXTRA_INC)
|
CFLAGS = $(OPTFLAGS) $(MLIB_INC) -I. -I.. -Wall $(EXTRA_INC)
|
||||||
# -I/usr/X11R6/include/
|
# -I/usr/X11R6/include/
|
||||||
|
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
* along with GNU Make; see the file COPYING. If not, write to
|
* along with GNU Make; see the file COPYING. If not, write to
|
||||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
*
|
*
|
||||||
|
* MMX/MMX2 Template stuff from Michael Niedermayer (michaelni@gmx.at) (needed for fast movntq support)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
@ -34,12 +35,77 @@
|
|||||||
#include "config.h"
|
#include "config.h"
|
||||||
//#include "video_out.h"
|
//#include "video_out.h"
|
||||||
#include "rgb2rgb.h"
|
#include "rgb2rgb.h"
|
||||||
|
#include "../cpudetect.h"
|
||||||
|
|
||||||
#ifdef HAVE_MLIB
|
#ifdef HAVE_MLIB
|
||||||
#include "yuv2rgb_mlib.c"
|
#include "yuv2rgb_mlib.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern yuv2rgb_fun yuv2rgb_init_mmx (int bpp, int mode);
|
#define DITHER1XBPP // only for mmx
|
||||||
|
|
||||||
|
#ifdef ARCH_X86
|
||||||
|
#define CAN_COMPILE_X86_ASM
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CAN_COMPILE_X86_ASM
|
||||||
|
|
||||||
|
/* hope these constant values are cache line aligned */
|
||||||
|
uint64_t __attribute__((aligned(8))) mmx_80w = 0x0080008000800080;
|
||||||
|
uint64_t __attribute__((aligned(8))) mmx_10w = 0x1010101010101010;
|
||||||
|
uint64_t __attribute__((aligned(8))) mmx_00ffw = 0x00ff00ff00ff00ff;
|
||||||
|
uint64_t __attribute__((aligned(8))) mmx_Y_coeff = 0x253f253f253f253f;
|
||||||
|
|
||||||
|
/* hope these constant values are cache line aligned */
|
||||||
|
uint64_t __attribute__((aligned(8))) mmx_U_green = 0xf37df37df37df37d;
|
||||||
|
uint64_t __attribute__((aligned(8))) mmx_U_blue = 0x4093409340934093;
|
||||||
|
uint64_t __attribute__((aligned(8))) mmx_V_red = 0x3312331233123312;
|
||||||
|
uint64_t __attribute__((aligned(8))) mmx_V_green = 0xe5fce5fce5fce5fc;
|
||||||
|
|
||||||
|
/* hope these constant values are cache line aligned */
|
||||||
|
uint64_t __attribute__((aligned(8))) mmx_redmask = 0xf8f8f8f8f8f8f8f8;
|
||||||
|
uint64_t __attribute__((aligned(8))) mmx_grnmask = 0xfcfcfcfcfcfcfcfc;
|
||||||
|
|
||||||
|
uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL;
|
||||||
|
uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL;
|
||||||
|
uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL;
|
||||||
|
|
||||||
|
// the volatile is required because gcc otherwise optimizes some writes away not knowing that these
|
||||||
|
// are read in the asm block
|
||||||
|
volatile uint64_t __attribute__((aligned(8))) b5Dither;
|
||||||
|
volatile uint64_t __attribute__((aligned(8))) g5Dither;
|
||||||
|
volatile uint64_t __attribute__((aligned(8))) g6Dither;
|
||||||
|
volatile uint64_t __attribute__((aligned(8))) r5Dither;
|
||||||
|
|
||||||
|
uint64_t __attribute__((aligned(8))) dither4[2]={
|
||||||
|
0x0103010301030103LL,
|
||||||
|
0x0200020002000200LL,};
|
||||||
|
|
||||||
|
uint64_t __attribute__((aligned(8))) dither8[2]={
|
||||||
|
0x0602060206020602LL,
|
||||||
|
0x0004000400040004LL,};
|
||||||
|
|
||||||
|
#undef HAVE_MMX
|
||||||
|
#undef ARCH_X86
|
||||||
|
|
||||||
|
//MMX versions
|
||||||
|
#undef RENAME
|
||||||
|
#define HAVE_MMX
|
||||||
|
#undef HAVE_MMX2
|
||||||
|
#undef HAVE_3DNOW
|
||||||
|
#define ARCH_X86
|
||||||
|
#define RENAME(a) a ## _MMX
|
||||||
|
#include "yuv2rgb_template.c"
|
||||||
|
|
||||||
|
//MMX2 versions
|
||||||
|
#undef RENAME
|
||||||
|
#define HAVE_MMX
|
||||||
|
#define HAVE_MMX2
|
||||||
|
#undef HAVE_3DNOW
|
||||||
|
#define ARCH_X86
|
||||||
|
#define RENAME(a) a ## _MMX2
|
||||||
|
#include "yuv2rgb_template.c"
|
||||||
|
|
||||||
|
#endif // CAN_COMPILE_X86_ASM
|
||||||
|
|
||||||
|
|
||||||
uint32_t matrix_coefficients = 6;
|
uint32_t matrix_coefficients = 6;
|
||||||
@ -63,10 +129,10 @@ static void (* yuv2rgb_c_internal) (uint8_t *, uint8_t *,
|
|||||||
uint8_t *, uint8_t *,
|
uint8_t *, uint8_t *,
|
||||||
void *, void *, int);
|
void *, void *, int);
|
||||||
|
|
||||||
static void yuv2rgb_c (void * dst, uint8_t * py,
|
static void yuv2rgb_c (void * dst, uint8_t * py,
|
||||||
uint8_t * pu, uint8_t * pv,
|
uint8_t * pu, uint8_t * pv,
|
||||||
int h_size, int v_size,
|
int h_size, int v_size,
|
||||||
int rgb_stride, int y_stride, int uv_stride)
|
int rgb_stride, int y_stride, int uv_stride)
|
||||||
{
|
{
|
||||||
v_size >>= 1;
|
v_size >>= 1;
|
||||||
|
|
||||||
@ -81,16 +147,29 @@ static void yuv2rgb_c (void * dst, uint8_t * py,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void yuv2rgb_init (int bpp, int mode)
|
void yuv2rgb_init (int bpp, int mode)
|
||||||
{
|
{
|
||||||
yuv2rgb = NULL;
|
yuv2rgb = NULL;
|
||||||
#ifdef HAVE_MMX
|
#ifdef CAN_COMPILE_X86_ASM
|
||||||
if (yuv2rgb == NULL /*&& (config.flags & VO_MMX_ENABLE)*/) {
|
if(gCpuCaps.hasMMX2)
|
||||||
yuv2rgb = yuv2rgb_init_mmx (bpp, mode);
|
{
|
||||||
if (yuv2rgb != NULL)
|
if (yuv2rgb == NULL /*&& (config.flags & VO_MMX_ENABLE)*/) {
|
||||||
printf ("Using MMX for colorspace transform\n");
|
yuv2rgb = yuv2rgb_init_MMX2 (bpp, mode);
|
||||||
else
|
if (yuv2rgb != NULL)
|
||||||
printf ("Cannot init MMX colorspace transform\n");
|
printf ("Using MMX2 for colorspace transform\n");
|
||||||
|
else
|
||||||
|
printf ("Cannot init MMX2 colorspace transform\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if(gCpuCaps.hasMMX)
|
||||||
|
{
|
||||||
|
if (yuv2rgb == NULL /*&& (config.flags & VO_MMX_ENABLE)*/) {
|
||||||
|
yuv2rgb = yuv2rgb_init_MMX (bpp, mode);
|
||||||
|
if (yuv2rgb != NULL)
|
||||||
|
printf ("Using MMX for colorspace transform\n");
|
||||||
|
else
|
||||||
|
printf ("Cannot init MMX colorspace transform\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAVE_MLIB
|
#ifdef HAVE_MLIB
|
||||||
|
@ -24,58 +24,27 @@
|
|||||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
*
|
*
|
||||||
* 15,24 bpp and dithering from Michael Niedermayer (michaelni@gmx.at)
|
* 15,24 bpp and dithering from Michael Niedermayer (michaelni@gmx.at)
|
||||||
|
* MMX/MMX2 Template stuff from Michael Niedermayer (needed for fast movntq support)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdio.h>
|
#undef MOVNTQ
|
||||||
#include <stdlib.h>
|
#undef EMMS
|
||||||
|
#undef SFENCE
|
||||||
#include "../config.h"
|
|
||||||
|
|
||||||
//#include "libmpeg2/mpeg2.h"
|
|
||||||
//#include "libmpeg2/mpeg2_internal.h"
|
|
||||||
#include <inttypes.h>
|
|
||||||
|
|
||||||
#include "rgb2rgb.h"
|
|
||||||
#include "../mmx_defs.h"
|
|
||||||
|
|
||||||
#define DITHER1XBPP
|
|
||||||
|
|
||||||
/* hope these constant values are cache line aligned */
|
|
||||||
uint64_t __attribute__((aligned(8))) mmx_80w = 0x0080008000800080;
|
|
||||||
uint64_t __attribute__((aligned(8))) mmx_10w = 0x1010101010101010;
|
|
||||||
uint64_t __attribute__((aligned(8))) mmx_00ffw = 0x00ff00ff00ff00ff;
|
|
||||||
uint64_t __attribute__((aligned(8))) mmx_Y_coeff = 0x253f253f253f253f;
|
|
||||||
|
|
||||||
/* hope these constant values are cache line aligned */
|
|
||||||
uint64_t __attribute__((aligned(8))) mmx_U_green = 0xf37df37df37df37d;
|
|
||||||
uint64_t __attribute__((aligned(8))) mmx_U_blue = 0x4093409340934093;
|
|
||||||
uint64_t __attribute__((aligned(8))) mmx_V_red = 0x3312331233123312;
|
|
||||||
uint64_t __attribute__((aligned(8))) mmx_V_green = 0xe5fce5fce5fce5fc;
|
|
||||||
|
|
||||||
/* hope these constant values are cache line aligned */
|
|
||||||
uint64_t __attribute__((aligned(8))) mmx_redmask = 0xf8f8f8f8f8f8f8f8;
|
|
||||||
uint64_t __attribute__((aligned(8))) mmx_grnmask = 0xfcfcfcfcfcfcfcfc;
|
|
||||||
|
|
||||||
uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL;
|
|
||||||
uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL;
|
|
||||||
uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL;
|
|
||||||
|
|
||||||
// the volatile is required because gcc otherwise optimizes some writes away not knowing that these
|
|
||||||
// are read in the asm block
|
|
||||||
volatile uint64_t __attribute__((aligned(8))) b5Dither;
|
|
||||||
volatile uint64_t __attribute__((aligned(8))) g5Dither;
|
|
||||||
volatile uint64_t __attribute__((aligned(8))) g6Dither;
|
|
||||||
volatile uint64_t __attribute__((aligned(8))) r5Dither;
|
|
||||||
|
|
||||||
uint64_t __attribute__((aligned(8))) dither4[2]={
|
|
||||||
0x0103010301030103LL,
|
|
||||||
0x0200020002000200LL,};
|
|
||||||
|
|
||||||
uint64_t __attribute__((aligned(8))) dither8[2]={
|
|
||||||
0x0602060206020602LL,
|
|
||||||
0x0004000400040004LL,};
|
|
||||||
|
|
||||||
|
#ifdef HAVE_3DNOW
|
||||||
|
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
|
||||||
|
#define EMMS "femms"
|
||||||
|
#else
|
||||||
|
#define EMMS "emms"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_MMX2
|
||||||
|
#define MOVNTQ "movntq"
|
||||||
|
#define SFENCE "sfence"
|
||||||
|
#else
|
||||||
|
#define MOVNTQ "movq"
|
||||||
|
#define SFENCE "/nop"
|
||||||
|
#endif
|
||||||
|
|
||||||
#define YUV2RGB \
|
#define YUV2RGB \
|
||||||
/* Do the multiply part of the conversion for even and odd pixels,
|
/* Do the multiply part of the conversion for even and odd pixels,
|
||||||
@ -152,7 +121,7 @@ uint64_t __attribute__((aligned(8))) dither8[2]={
|
|||||||
"punpcklbw %%mm5, %%mm2;" /* G7 G6 G5 G4 G3 G2 G1 G0 */\
|
"punpcklbw %%mm5, %%mm2;" /* G7 G6 G5 G4 G3 G2 G1 G0 */\
|
||||||
|
|
||||||
|
|
||||||
static void yuv420_rgb16_mmx (uint8_t * image, uint8_t * py,
|
static inline void RENAME(yuv420_rgb16) (uint8_t * image, uint8_t * py,
|
||||||
uint8_t * pu, uint8_t * pv,
|
uint8_t * pu, uint8_t * pv,
|
||||||
int h_size, int v_size,
|
int h_size, int v_size,
|
||||||
int rgb_stride, int y_stride, int uv_stride)
|
int rgb_stride, int y_stride, int uv_stride)
|
||||||
@ -253,7 +222,7 @@ YUV2RGB
|
|||||||
__asm__ __volatile__ (EMMS);
|
__asm__ __volatile__ (EMMS);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void yuv420_rgb15_mmx (uint8_t * image, uint8_t * py,
|
static inline void RENAME(yuv420_rgb15) (uint8_t * image, uint8_t * py,
|
||||||
uint8_t * pu, uint8_t * pv,
|
uint8_t * pu, uint8_t * pv,
|
||||||
int h_size, int v_size,
|
int h_size, int v_size,
|
||||||
int rgb_stride, int y_stride, int uv_stride)
|
int rgb_stride, int y_stride, int uv_stride)
|
||||||
@ -350,7 +319,7 @@ YUV2RGB
|
|||||||
__asm__ __volatile__ (EMMS);
|
__asm__ __volatile__ (EMMS);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void yuv420_rgb24_mmx (uint8_t * image, uint8_t * py,
|
static inline void RENAME(yuv420_rgb24) (uint8_t * image, uint8_t * py,
|
||||||
uint8_t * pu, uint8_t * pv,
|
uint8_t * pu, uint8_t * pv,
|
||||||
int h_size, int v_size,
|
int h_size, int v_size,
|
||||||
int rgb_stride, int y_stride, int uv_stride)
|
int rgb_stride, int y_stride, int uv_stride)
|
||||||
@ -505,7 +474,7 @@ YUV2RGB
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void yuv420_argb32_mmx (uint8_t * image, uint8_t * py,
|
static inline void RENAME(yuv420_argb32) (uint8_t * image, uint8_t * py,
|
||||||
uint8_t * pu, uint8_t * pv,
|
uint8_t * pu, uint8_t * pv,
|
||||||
int h_size, int v_size,
|
int h_size, int v_size,
|
||||||
int rgb_stride, int y_stride, int uv_stride)
|
int rgb_stride, int y_stride, int uv_stride)
|
||||||
@ -599,12 +568,12 @@ YUV2RGB
|
|||||||
__asm__ __volatile__ (EMMS);
|
__asm__ __volatile__ (EMMS);
|
||||||
}
|
}
|
||||||
|
|
||||||
yuv2rgb_fun yuv2rgb_init_mmx (int bpp, int mode)
|
yuv2rgb_fun RENAME(yuv2rgb_init) (int bpp, int mode)
|
||||||
{
|
{
|
||||||
if (bpp == 15 && mode == MODE_RGB) return yuv420_rgb15_mmx;
|
if (bpp == 15 && mode == MODE_RGB) return RENAME(yuv420_rgb15);
|
||||||
if (bpp == 16 && mode == MODE_RGB) return yuv420_rgb16_mmx;
|
if (bpp == 16 && mode == MODE_RGB) return RENAME(yuv420_rgb16);
|
||||||
if (bpp == 24 && mode == MODE_RGB) return yuv420_rgb24_mmx;
|
if (bpp == 24 && mode == MODE_RGB) return RENAME(yuv420_rgb24);
|
||||||
if (bpp == 32 && mode == MODE_RGB) return yuv420_argb32_mmx;
|
if (bpp == 32 && mode == MODE_RGB) return RENAME(yuv420_argb32);
|
||||||
return NULL; // Fallback to C.
|
return NULL; // Fallback to C.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user