diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 8da1201cbc..bc36826ea2 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -352,10 +352,11 @@ typedef struct SwsContext { #define U_TEMP "11*8+4*4*256*2+24" #define V_TEMP "11*8+4*4*256*2+32" #define Y_TEMP "11*8+4*4*256*2+40" -#define UV_OFF_PX "11*8+4*4*256*2+48" -#define UV_OFF_BYTE "11*8+4*4*256*2+56" -#define DITHER16 "11*8+4*4*256*2+64" -#define DITHER32 "11*8+4*4*256*2+80" +#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48" +#define UV_OFF_PX "11*8+4*4*256*3+48" +#define UV_OFF_BYTE "11*8+4*4*256*3+56" +#define DITHER16 "11*8+4*4*256*3+64" +#define DITHER32 "11*8+4*4*256*3+80" DECLARE_ALIGNED(8, uint64_t, redDither); DECLARE_ALIGNED(8, uint64_t, greenDither); @@ -377,6 +378,7 @@ typedef struct SwsContext { DECLARE_ALIGNED(8, uint64_t, u_temp); DECLARE_ALIGNED(8, uint64_t, v_temp); DECLARE_ALIGNED(8, uint64_t, y_temp); + int32_t alpMmxFilter[4 * MAX_FILTER_SIZE]; // alignment of these values is not necessary, but merely here // to maintain the same offset across x8632 and x86-64. Once we // use proper offset macros in the asm, they can be removed. diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c index c112cb8a6d..764472e95e 100644 --- a/libswscale/x86/swscale_mmx.c +++ b/libswscale/x86/swscale_mmx.c @@ -90,6 +90,7 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI const int flags= c->flags; int16_t **lumPixBuf= c->lumPixBuf; int16_t **chrUPixBuf= c->chrUPixBuf; + int16_t **alpPixBuf= c->alpPixBuf; const int vLumBufSize= c->vLumBufSize; const int vChrBufSize= c->vChrBufSize; int16_t *vLumFilterPos= c->vLumFilterPos; @@ -98,6 +99,7 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI int16_t *vChrFilter= c->vChrFilter; int32_t *lumMmxFilter= c->lumMmxFilter; int32_t *chrMmxFilter= c->chrMmxFilter; + int32_t av_unused *alpMmxFilter= c->alpMmxFilter; const int vLumFilterSize= c->vLumFilterSize; const int vChrFilterSize= c->vChrFilterSize; const int chrDstY= dstY>>c->chrDstVSubSample; @@ -113,6 +115,7 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI if (dstY < dstH - 2) { const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; int i; if (flags & SWS_ACCURATE_RND) { int s= APCK_SIZE / 8; @@ -122,6 +125,12 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI lumMmxFilter[s*i+APCK_COEF/4 ]= lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ] + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); + if (CONFIG_SWSCALE_ALPHA && alpPixBuf) { + *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ]; + *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)]; + alpMmxFilter[s*i+APCK_COEF/4 ]= + alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ]; + } } for (i=0; ialpPixBuf) { YSCALEYUV2PACKEDX YSCALEYUV2RGBX - YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7) + YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7) "psraw $3, %%mm1 \n\t" "psraw $3, %%mm7 \n\t" "packuswb %%mm7, %%mm1 \n\t"