mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2025-01-24 13:04:39 +00:00
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
Originally committed as revision 1885 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
3db320ea0c
commit
669ac79cf5
@ -466,6 +466,14 @@ CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c,
|
||||
#else // 64 bit variant
|
||||
|
||||
#define PIXOP2(OPNAME, OP) \
|
||||
static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
||||
int i;\
|
||||
for(i=0; i<h; i++){\
|
||||
OP(*((uint16_t*)(block )), LD16(pixels ));\
|
||||
pixels+=line_size;\
|
||||
block +=line_size;\
|
||||
}\
|
||||
}\
|
||||
static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
||||
int i;\
|
||||
for(i=0; i<h; i++){\
|
||||
@ -526,6 +534,17 @@ static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, cons
|
||||
}\
|
||||
}\
|
||||
\
|
||||
static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
||||
int src_stride1, int src_stride2, int h){\
|
||||
int i;\
|
||||
for(i=0; i<h; i++){\
|
||||
uint32_t a,b;\
|
||||
a= LD16(&src1[i*src_stride1 ]);\
|
||||
b= LD16(&src2[i*src_stride2 ]);\
|
||||
OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
|
||||
}\
|
||||
}\
|
||||
\
|
||||
static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
||||
int src_stride1, int src_stride2, int h){\
|
||||
OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
|
||||
@ -589,6 +608,23 @@ static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint
|
||||
OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
||||
}\
|
||||
}\
|
||||
\
|
||||
static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
||||
OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
|
||||
}\
|
||||
\
|
||||
static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
||||
OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
|
||||
}\
|
||||
\
|
||||
static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
||||
OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
|
||||
}\
|
||||
\
|
||||
static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
||||
OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
|
||||
}\
|
||||
\
|
||||
static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
|
||||
int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
||||
int i;\
|
||||
@ -635,6 +671,75 @@ static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *sr
|
||||
OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
|
||||
}\
|
||||
\
|
||||
static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
||||
{\
|
||||
int i, a0, b0, a1, b1;\
|
||||
a0= pixels[0];\
|
||||
b0= pixels[1] + 2;\
|
||||
a0 += b0;\
|
||||
b0 += pixels[2];\
|
||||
\
|
||||
pixels+=line_size;\
|
||||
for(i=0; i<h; i+=2){\
|
||||
a1= pixels[0];\
|
||||
b1= pixels[1];\
|
||||
a1 += b1;\
|
||||
b1 += pixels[2];\
|
||||
\
|
||||
block[0]= (a1+a0)>>2; /* FIXME non put */\
|
||||
block[1]= (b1+b0)>>2;\
|
||||
\
|
||||
pixels+=line_size;\
|
||||
block +=line_size;\
|
||||
\
|
||||
a0= pixels[0];\
|
||||
b0= pixels[1] + 2;\
|
||||
a0 += b0;\
|
||||
b0 += pixels[2];\
|
||||
\
|
||||
block[0]= (a1+a0)>>2;\
|
||||
block[1]= (b1+b0)>>2;\
|
||||
pixels+=line_size;\
|
||||
block +=line_size;\
|
||||
}\
|
||||
}\
|
||||
\
|
||||
static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
||||
{\
|
||||
int i;\
|
||||
const uint32_t a= LD32(pixels );\
|
||||
const uint32_t b= LD32(pixels+1);\
|
||||
uint32_t l0= (a&0x03030303UL)\
|
||||
+ (b&0x03030303UL)\
|
||||
+ 0x02020202UL;\
|
||||
uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
|
||||
+ ((b&0xFCFCFCFCUL)>>2);\
|
||||
uint32_t l1,h1;\
|
||||
\
|
||||
pixels+=line_size;\
|
||||
for(i=0; i<h; i+=2){\
|
||||
uint32_t a= LD32(pixels );\
|
||||
uint32_t b= LD32(pixels+1);\
|
||||
l1= (a&0x03030303UL)\
|
||||
+ (b&0x03030303UL);\
|
||||
h1= ((a&0xFCFCFCFCUL)>>2)\
|
||||
+ ((b&0xFCFCFCFCUL)>>2);\
|
||||
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
||||
pixels+=line_size;\
|
||||
block +=line_size;\
|
||||
a= LD32(pixels );\
|
||||
b= LD32(pixels+1);\
|
||||
l0= (a&0x03030303UL)\
|
||||
+ (b&0x03030303UL)\
|
||||
+ 0x02020202UL;\
|
||||
h0= ((a&0xFCFCFCFCUL)>>2)\
|
||||
+ ((b&0xFCFCFCFCUL)>>2);\
|
||||
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
||||
pixels+=line_size;\
|
||||
block +=line_size;\
|
||||
}\
|
||||
}\
|
||||
\
|
||||
static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
||||
{\
|
||||
int j;\
|
||||
@ -819,6 +924,125 @@ static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
|
||||
oy += dyy;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
||||
switch(width){
|
||||
case 2: put_pixels2_c (dst, src, stride, height); break;
|
||||
case 4: put_pixels4_c (dst, src, stride, height); break;
|
||||
case 8: put_pixels8_c (dst, src, stride, height); break;
|
||||
case 16:put_pixels16_c(dst, src, stride, height); break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
#define TPEL_WIDTH(width)\
|
||||
static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
||||
void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
|
||||
static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
||||
void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
|
||||
static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
||||
void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
|
||||
static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
||||
void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
|
||||
static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
||||
void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
|
||||
static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
||||
void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
|
||||
static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
||||
void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
|
||||
static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
||||
void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
|
||||
static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
||||
void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
|
||||
#endif
|
||||
|
||||
#define H264_CHROMA_MC(OPNAME, OP)\
|
||||
static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
|
||||
const int A=(8-x)*(8-y);\
|
||||
@ -2561,6 +2785,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
||||
dspfunc(put_no_rnd, 0, 16);
|
||||
dspfunc(put, 1, 8);
|
||||
dspfunc(put_no_rnd, 1, 8);
|
||||
dspfunc(put, 2, 4);
|
||||
dspfunc(put, 3, 2);
|
||||
|
||||
dspfunc(avg, 0, 16);
|
||||
dspfunc(avg_no_rnd, 0, 16);
|
||||
@ -2568,6 +2794,16 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
||||
dspfunc(avg_no_rnd, 1, 8);
|
||||
#undef dspfunc
|
||||
|
||||
c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
|
||||
c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
|
||||
c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
|
||||
c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
|
||||
c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
|
||||
c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
|
||||
c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
|
||||
c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
|
||||
c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
|
||||
|
||||
#define dspfunc(PFX, IDX, NUM) \
|
||||
c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
|
||||
c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
|
||||
@ -2621,7 +2857,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
||||
c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
|
||||
c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
|
||||
c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
|
||||
|
||||
|
||||
c->hadamard8_diff[0]= hadamard8_diff16_c;
|
||||
c->hadamard8_diff[1]= hadamard8_diff_c;
|
||||
c->hadamard8_abs = hadamard8_abs_c;
|
||||
|
@ -77,6 +77,7 @@ void clear_blocks_c(DCTELEM *blocks);
|
||||
/* add and put pixel (decoding) */
|
||||
// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
|
||||
typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
|
||||
typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
|
||||
typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
|
||||
typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
|
||||
|
||||
@ -146,18 +147,18 @@ typedef struct DSPContext {
|
||||
me_cmp_func me_sub_cmp[11];
|
||||
me_cmp_func mb_cmp[11];
|
||||
|
||||
/* maybe create an array for 16/8 functions */
|
||||
/* maybe create an array for 16/8/4/2 functions */
|
||||
/**
|
||||
* Halfpel motion compensation with rounding (a+b+1)>>1.
|
||||
* this is an array[2][4] of motion compensation funcions for 2
|
||||
* horizontal blocksizes (8,16) and the 4 halfpel positions<br>
|
||||
* this is an array[4][4] of motion compensation funcions for 4
|
||||
* horizontal blocksizes (2,4,8,16) and the 4 halfpel positions<br>
|
||||
* *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
|
||||
* @param block destination where the result is stored
|
||||
* @param pixels source
|
||||
* @param line_size number of bytes in a horizontal line of block
|
||||
* @param h height
|
||||
*/
|
||||
op_pixels_func put_pixels_tab[2][4];
|
||||
op_pixels_func put_pixels_tab[4][4];
|
||||
|
||||
/**
|
||||
* Halfpel motion compensation with rounding (a+b+1)>>1.
|
||||
@ -194,6 +195,18 @@ typedef struct DSPContext {
|
||||
* @param h height
|
||||
*/
|
||||
op_pixels_func avg_no_rnd_pixels_tab[2][4];
|
||||
|
||||
/**
|
||||
* Thirdpel motion compensation with rounding (a+b+1)>>1.
|
||||
* this is an array[12] of motion compensation funcions for the 9 thirdpel positions<br>
|
||||
* *pixels_tab[ xthirdpel + 4*ythirdpel ]
|
||||
* @param block destination where the result is stored
|
||||
* @param pixels source
|
||||
* @param line_size number of bytes in a horizontal line of block
|
||||
* @param h height
|
||||
*/
|
||||
tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
|
||||
|
||||
qpel_mc_func put_qpel_pixels_tab[2][16];
|
||||
qpel_mc_func avg_qpel_pixels_tab[2][16];
|
||||
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
|
||||
@ -380,7 +393,9 @@ void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
|
||||
|
||||
struct unaligned_64 { uint64_t l; } __attribute__((packed));
|
||||
struct unaligned_32 { uint32_t l; } __attribute__((packed));
|
||||
struct unaligned_16 { uint16_t l; } __attribute__((packed));
|
||||
|
||||
#define LD16(a) (((const struct unaligned_16 *) (a))->l)
|
||||
#define LD32(a) (((const struct unaligned_32 *) (a))->l)
|
||||
#define LD64(a) (((const struct unaligned_64 *) (a))->l)
|
||||
|
||||
@ -388,6 +403,7 @@ struct unaligned_32 { uint32_t l; } __attribute__((packed));
|
||||
|
||||
#else /* __GNUC__ */
|
||||
|
||||
#define LD16(a) (*((uint16_t*)(a)))
|
||||
#define LD32(a) (*((uint32_t*)(a)))
|
||||
#define LD64(a) (*((uint64_t*)(a)))
|
||||
|
||||
|
@ -262,125 +262,11 @@ static inline int svq3_decode_block (GetBitContext *gb, DCTELEM *block,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sixpel_mc_put (MpegEncContext *s,
|
||||
uint8_t *src, uint8_t *dst, int stride,
|
||||
int dxy, int width, int height) {
|
||||
int i, j;
|
||||
|
||||
switch (dxy) {
|
||||
case 6*0+0:
|
||||
for (i=0; i < height; i++) {
|
||||
memcpy (dst, src, width);
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
break;
|
||||
case 6*0+2:
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
break;
|
||||
case 6*0+3:
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (src[j] + src[j+1] + 1) >> 1;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
break;
|
||||
case 6*0+4:
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
break;
|
||||
case 6*2+0:
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
break;
|
||||
case 6*2+2:
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
break;
|
||||
case 6*2+4:
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
break;
|
||||
case 6*3+0:
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (src[j] + src[j+stride]+1) >> 1;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
break;
|
||||
case 6*3+3:
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (src[j] + src[j+1] + src[j+stride] + src[j+stride+1] + 2) >> 2;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
break;
|
||||
case 6*4+0:
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
break;
|
||||
case 6*4+2:
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
break;
|
||||
case 6*4+4:
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void svq3_mc_dir_part (MpegEncContext *s, int x, int y,
|
||||
int width, int height, int mx, int my, int dxy) {
|
||||
int width, int height, int mx, int my, int dxy, int thirdpel) {
|
||||
uint8_t *src, *dest;
|
||||
int i, emu = 0;
|
||||
int blocksize= 2 - (width>>3); //16->0, 8->1, 4->2
|
||||
|
||||
mx += x;
|
||||
my += y;
|
||||
@ -405,13 +291,17 @@ static inline void svq3_mc_dir_part (MpegEncContext *s, int x, int y,
|
||||
mx, my, s->width, s->height);
|
||||
src = s->edge_emu_buffer;
|
||||
}
|
||||
sixpel_mc_put (s, src, dest, s->linesize, dxy, width, height);
|
||||
if(thirdpel)
|
||||
s->dsp.put_tpel_pixels_tab[dxy](dest, src, s->linesize, width, height);
|
||||
else
|
||||
s->dsp.put_pixels_tab[blocksize][dxy](dest, src, s->linesize, height);
|
||||
|
||||
if (!(s->flags & CODEC_FLAG_GRAY)) {
|
||||
mx = (mx + (mx < (int) x)) >> 1;
|
||||
my = (my + (my < (int) y)) >> 1;
|
||||
width = (width >> 1);
|
||||
height = (height >> 1);
|
||||
blocksize++;
|
||||
|
||||
for (i=1; i < 3; i++) {
|
||||
dest = s->current_picture.data[i] + (x >> 1) + (y >> 1)*s->uvlinesize;
|
||||
@ -422,7 +312,10 @@ static inline void svq3_mc_dir_part (MpegEncContext *s, int x, int y,
|
||||
mx, my, (s->width >> 1), (s->height >> 1));
|
||||
src = s->edge_emu_buffer;
|
||||
}
|
||||
sixpel_mc_put (s, src, dest, s->uvlinesize, dxy, width, height);
|
||||
if(thirdpel)
|
||||
s->dsp.put_tpel_pixels_tab[dxy](dest, src, s->uvlinesize, width, height);
|
||||
else
|
||||
s->dsp.put_pixels_tab[blocksize][dxy](dest, src, s->uvlinesize, height);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -441,7 +334,7 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
|
||||
h->topright_samples_available = 0xFFFF;
|
||||
|
||||
if (mb_type == 0) { /* SKIP */
|
||||
svq3_mc_dir_part (s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0);
|
||||
svq3_mc_dir_part (s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0, 0);
|
||||
|
||||
cbp = 0;
|
||||
mb_type = MB_TYPE_SKIP;
|
||||
@ -521,17 +414,17 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
|
||||
my = ((my + 1)>>1) + dy;
|
||||
fx= ((unsigned)(mx + 0x3000))/3 - 0x1000;
|
||||
fy= ((unsigned)(my + 0x3000))/3 - 0x1000;
|
||||
dxy= 2*(mx - 3*fx) + 2*6*(my - 3*fy);
|
||||
dxy= (mx - 3*fx) + 4*(my - 3*fy);
|
||||
|
||||
svq3_mc_dir_part (s, x, y, part_width, part_height, fx, fy, dxy);
|
||||
svq3_mc_dir_part (s, x, y, part_width, part_height, fx, fy, dxy, 1);
|
||||
mx += mx;
|
||||
my += my;
|
||||
} else if (mode == HALFPEL_MODE) {
|
||||
mx = ((unsigned)(mx + 1 + 0x3000))/3 + dx - 0x1000;
|
||||
my = ((unsigned)(my + 1 + 0x3000))/3 + dy - 0x1000;
|
||||
dxy= 3*(mx&1) + 6*3*(my&1);
|
||||
dxy= (mx&1) + 2*(my&1);
|
||||
|
||||
svq3_mc_dir_part (s, x, y, part_width, part_height, mx>>1, my>>1, dxy);
|
||||
svq3_mc_dir_part (s, x, y, part_width, part_height, mx>>1, my>>1, dxy, 0);
|
||||
mx *= 3;
|
||||
my *= 3;
|
||||
} else {
|
||||
@ -539,7 +432,7 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
|
||||
mx = ((unsigned)(mx + 3 + 0x6000))/6 + dx - 0x1000;
|
||||
my = ((unsigned)(my + 3 + 0x6000))/6 + dy - 0x1000;
|
||||
|
||||
svq3_mc_dir_part (s, x, y, part_width, part_height, mx, my, 0);
|
||||
svq3_mc_dir_part (s, x, y, part_width, part_height, mx, my, 0, 0);
|
||||
mx *= 6;
|
||||
my *= 6;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user