mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2024-11-24 20:19:55 +00:00
Fix accurate rounding mode on x86_64.
Fixes issue222. Originally committed as revision 27545 to svn://svn.mplayerhq.hu/mplayer/trunk/libswscale
This commit is contained in:
parent
3943bd0901
commit
1625216eaa
@ -1293,7 +1293,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
|
||||
filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
|
||||
assert(filterSize > 0);
|
||||
filter= av_malloc(filterSize*dstW*sizeof(double));
|
||||
if (filterSize >= MAX_FILTER_SIZE || !filter)
|
||||
if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
|
||||
goto error;
|
||||
*outFilterSize= filterSize;
|
||||
|
||||
|
@ -29,6 +29,8 @@
|
||||
|
||||
#include "libavutil/avutil.h"
|
||||
|
||||
#define STR(s) AV_TOSTRING(s) //AV_STINGIFY is too long
|
||||
|
||||
#define MAX_FILTER_SIZE 256
|
||||
|
||||
#define VOFW 2048
|
||||
@ -40,6 +42,16 @@
|
||||
#define ALT32_CORR 1
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_X86_64
|
||||
# define APCK_PTR2 8
|
||||
# define APCK_COEF 16
|
||||
# define APCK_SIZE 24
|
||||
#else
|
||||
# define APCK_PTR2 4
|
||||
# define APCK_COEF 8
|
||||
# define APCK_SIZE 16
|
||||
#endif
|
||||
|
||||
typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
|
||||
int srcSliceH, uint8_t* dst[], int dstStride[]);
|
||||
|
||||
|
@ -119,19 +119,19 @@
|
||||
"1: \n\t"\
|
||||
"movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\
|
||||
"movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
|
||||
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"movq " x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\
|
||||
"movq %%mm0, %%mm3 \n\t"\
|
||||
"punpcklwd %%mm1, %%mm0 \n\t"\
|
||||
"punpckhwd %%mm1, %%mm3 \n\t"\
|
||||
"movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
|
||||
"movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
|
||||
"pmaddwd %%mm1, %%mm0 \n\t"\
|
||||
"pmaddwd %%mm1, %%mm3 \n\t"\
|
||||
"paddd %%mm0, %%mm4 \n\t"\
|
||||
"paddd %%mm3, %%mm5 \n\t"\
|
||||
"movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\
|
||||
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"add $16, %%"REG_d" \n\t"\
|
||||
"mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
|
||||
"test %%"REG_S", %%"REG_S" \n\t"\
|
||||
"movq %%mm2, %%mm0 \n\t"\
|
||||
"punpcklwd %%mm3, %%mm2 \n\t"\
|
||||
@ -271,19 +271,19 @@
|
||||
"2: \n\t"\
|
||||
"movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
|
||||
"movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
|
||||
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
|
||||
"movq %%mm0, %%mm3 \n\t"\
|
||||
"punpcklwd %%mm1, %%mm0 \n\t"\
|
||||
"punpckhwd %%mm1, %%mm3 \n\t"\
|
||||
"movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
|
||||
"movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\
|
||||
"pmaddwd %%mm1, %%mm0 \n\t"\
|
||||
"pmaddwd %%mm1, %%mm3 \n\t"\
|
||||
"paddd %%mm0, %%mm4 \n\t"\
|
||||
"paddd %%mm3, %%mm5 \n\t"\
|
||||
"movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
|
||||
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"add $16, %%"REG_d" \n\t"\
|
||||
"mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
|
||||
"test %%"REG_S", %%"REG_S" \n\t"\
|
||||
"movq %%mm2, %%mm0 \n\t"\
|
||||
"punpcklwd %%mm3, %%mm2 \n\t"\
|
||||
@ -315,19 +315,19 @@
|
||||
"2: \n\t"\
|
||||
"movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
|
||||
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
|
||||
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
|
||||
"movq %%mm0, %%mm3 \n\t"\
|
||||
"punpcklwd %%mm4, %%mm0 \n\t"\
|
||||
"punpckhwd %%mm4, %%mm3 \n\t"\
|
||||
"movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
|
||||
"movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
|
||||
"pmaddwd %%mm4, %%mm0 \n\t"\
|
||||
"pmaddwd %%mm4, %%mm3 \n\t"\
|
||||
"paddd %%mm0, %%mm1 \n\t"\
|
||||
"paddd %%mm3, %%mm5 \n\t"\
|
||||
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
|
||||
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"add $16, %%"REG_d" \n\t"\
|
||||
"mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
|
||||
"test %%"REG_S", %%"REG_S" \n\t"\
|
||||
"movq %%mm2, %%mm0 \n\t"\
|
||||
"punpcklwd %%mm3, %%mm2 \n\t"\
|
||||
@ -3180,18 +3180,19 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
|
||||
#ifdef HAVE_MMX
|
||||
int i;
|
||||
if (flags & SWS_ACCURATE_RND){
|
||||
int s= APCK_SIZE / 8;
|
||||
for (i=0; i<vLumFilterSize; i+=2){
|
||||
lumMmxFilter[2*i+0]= (int32_t)lumSrcPtr[i ];
|
||||
lumMmxFilter[2*i+1]= (int32_t)lumSrcPtr[i+(vLumFilterSize>1)];
|
||||
lumMmxFilter[2*i+2]=
|
||||
lumMmxFilter[2*i+3]= vLumFilter[dstY*vLumFilterSize + i ]
|
||||
*(void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
|
||||
*(void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
|
||||
lumMmxFilter[s*i+APCK_COEF/4 ]=
|
||||
lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
|
||||
+ (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
|
||||
}
|
||||
for (i=0; i<vChrFilterSize; i+=2){
|
||||
chrMmxFilter[2*i+0]= (int32_t)chrSrcPtr[i ];
|
||||
chrMmxFilter[2*i+1]= (int32_t)chrSrcPtr[i+(vChrFilterSize>1)];
|
||||
chrMmxFilter[2*i+2]=
|
||||
chrMmxFilter[2*i+3]= vChrFilter[chrDstY*vChrFilterSize + i ]
|
||||
*(void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ];
|
||||
*(void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)];
|
||||
chrMmxFilter[s*i+APCK_COEF/4 ]=
|
||||
chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
|
||||
+ (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
|
||||
}
|
||||
}else{
|
||||
|
Loading…
Reference in New Issue
Block a user