Merge remote-tracking branch 'qatar/master'

* qatar/master: (22 commits)
  H.264: fix filter_mb_fast with 4:4:4 + 8x8dct
  alsa: limit buffer_size to 32768 frames.
  alsa: fallback to buffer_size/4 for period_size.
  doc: replace @pxref by @ref where appropriate
  mpeg1video: don't abort if thread_count is too high.
  segafilm: add support for videos with cri adx adpcm
  gxf: Fix 25 fps DV material in GXF being misdetected as 50 fps
  libxvid: Add const qualifier to silence compiler warning.
  H.264: improve qp_thresh check
  H.264: use fill_rectangle in CABAC decoding
  H.264: Remove redundant hl_motion_16/8 code
  H.264: merge fill_rectangle into P-SKIP MV prediction, to match B-SKIP
  H.264: faster P-SKIP decoding
  H.264: av_always_inline some more functions
  H.264: Add x86 assembly for 10-bit H.264 predict functions
  swscale: rename uv_off/uv_off2 to uv_off_px/byte.
  swscale: implement error dithering in planarCopyWrapper.
  swscale: error dithering for 16/9/10-bit to 8-bit.
  swscale: fix overflow in 16-bit vertical scaling.
  swscale: fix crash in 8-bpc bilinear output without alpha.
  ...

Conflicts:
	doc/developer.texi
	libavdevice/alsa-audio.h
	libavformat/gxf.c
	libswscale/swscale.c
	libswscale/swscale_internal.h
	libswscale/swscale_unscaled.c
	libswscale/x86/swscale_template.c
	tests/ref/lavfi/pixdesc
	tests/ref/lavfi/pixfmts_copy
	tests/ref/lavfi/pixfmts_crop
	tests/ref/lavfi/pixfmts_hflip
	tests/ref/lavfi/pixfmts_null
	tests/ref/lavfi/pixfmts_scale
	tests/ref/lavfi/pixfmts_vflip

Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2011-07-10 04:28:50 +02:00
commit 2f56a97f24
19 changed files with 1191 additions and 108 deletions

View File

@ -244,7 +244,8 @@ Note, these rules are mostly borrowed from the MPlayer project.
@section Submitting patches
First, read the (@pxref{Coding Rules}) above if you did not yet.
First, read the @ref{Coding Rules} above if you did not yet, in particular
the rules regarding patch submission.
When you submit your patch, please use @code{git format-patch} or
@code{git send-email}. We cannot read other diffs :-)
@ -259,8 +260,8 @@ for us and greatly increases your chances of getting your patch applied.
Use the patcheck tool of FFmpeg to check your patch.
The tool is located in the tools directory.
Run the regression tests before submitting a patch so that you can
verify that there are no big problems.
Run the @ref{Regression Tests} before submitting a patch in order to verify
it does not cause unexpected problems.
Patches should be posted as base64 encoded attachments (or any other
encoding which ensures that the patch will not be trashed during

View File

@ -1013,8 +1013,7 @@ Erode an image by using a specific structuring element.
This filter corresponds to the libopencv function @code{cvErode}.
The filter accepts the parameters: @var{struct_el}:@var{nb_iterations},
with the same meaning and use of those of the dilate filter
(@pxref{dilate}).
with the same syntax and semantics as the @ref{dilate} filter.
@subsection smooth
@ -1432,7 +1431,7 @@ setdar=16:9
setdar=1.77777
@end example
See also the "setsar" filter documentation (@pxref{setsar}).
See also the @ref{setsar} filter documentation.
@section setpts
@ -1978,8 +1977,7 @@ form @var{width}x@var{height} or a frame size abbreviation.
the form @var{num}/@var{den} or a frame rate abbreviation.
@var{src_name} is the name to the frei0r source to load. For more
information regarding frei0r and how to set the parameters read the
section "frei0r" (@pxref{frei0r}) in the description of the video
filters.
section @ref{frei0r} in the description of the video filters.
Some examples follow:
@example

View File

@ -51,7 +51,7 @@ and the input video converted to MPEG-2 video, use the command:
ffmpeg -i INPUT -acodec pcm_u8 -vcodec mpeg2video -f crc -
@end example
See also the @code{framecrc} muxer (@pxref{framecrc}).
See also the @ref{framecrc} muxer.
@anchor{framecrc}
@section framecrc
@ -88,7 +88,7 @@ MPEG-2 video, use the command:
ffmpeg -i INPUT -acodec pcm_u8 -vcodec mpeg2video -f framecrc -
@end example
See also the @code{crc} muxer (@pxref{crc}).
See also the @ref{crc} muxer.
@section image2

View File

@ -352,6 +352,7 @@ sub postprocess
s/\(?\@xref\{(?:[^\}]*)\}(?:[^.<]|(?:<[^<>]*>))*\.\)?//g;
s/\s+\(\@pxref\{(?:[^\}]*)\}\)//g;
s/;\s+\@pxref\{(?:[^\}]*)\}//g;
s/\@ref\{([^\}]*)\}/$1/g;
s/\@noindent\s*//g;
s/\@refill//g;
s/\@gol//g;

View File

@ -778,24 +778,6 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
prefetch_motion(h, 1, pixel_shift, chroma444);
}
#define hl_motion_fn(sh, bits) \
static av_always_inline void hl_motion_ ## bits(H264Context *h, \
uint8_t *dest_y, \
uint8_t *dest_cb, uint8_t *dest_cr, \
qpel_mc_func (*qpix_put)[16], \
h264_chroma_mc_func (*chroma_put), \
qpel_mc_func (*qpix_avg)[16], \
h264_chroma_mc_func (*chroma_avg), \
h264_weight_func *weight_op, \
h264_biweight_func *weight_avg, \
int chroma444) \
{ \
hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \
}
hl_motion_fn(0, 8);
hl_motion_fn(1, 16);
static void free_tables(H264Context *h, int free_rbsp){
int i;
H264Context *hx;
@ -1443,7 +1425,7 @@ static void decode_postinit(H264Context *h, int setup_finished){
ff_thread_finish_setup(s->avctx);
}
static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
MpegEncContext * const s = &h->s;
uint8_t *top_border;
int top_idx = 1;
@ -1518,7 +1500,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
}
}
static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
uint8_t *src_cb, uint8_t *src_cr,
int linesize, int uvlinesize,
int xchg, int chroma444,
@ -1876,18 +1858,11 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
if(h->deblocking_filter)
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
}else if(is_h264){
if (pixel_shift) {
hl_motion_16(h, dest_y, dest_cb, dest_cr,
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab, 0);
} else
hl_motion_8(h, dest_y, dest_cb, dest_cr,
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab, 0);
hl_motion(h, dest_y, dest_cb, dest_cr,
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 0);
}
hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
@ -2020,18 +1995,11 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simpl
if(h->deblocking_filter)
xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
}else{
if (pixel_shift) {
hl_motion_16(h, dest[0], dest[1], dest[2],
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab, 1);
} else
hl_motion_8(h, dest[0], dest[1], dest[2],
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab, 1);
hl_motion(h, dest[0], dest[1], dest[2],
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 1);
}
for (p = 0; p < plane_count; p++)
@ -2966,7 +2934,9 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
}
}
}
h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
h->qp_thresh = 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset)
- FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1])
+ 6 * (h->sps.bit_depth_luma - 8);
#if 0 //FMO
if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)

View File

@ -766,11 +766,11 @@ static av_always_inline uint16_t pack8to16(int a, int b){
/**
* gets the chroma qp.
*/
static inline int get_chroma_qp(H264Context *h, int t, int qscale){
static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale){
return h->pps.chroma_qp_table[t][qscale];
}
static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my);
static av_always_inline void pred_pskip_motion(H264Context * const h);
static void fill_decode_neighbors(H264Context *h, int mb_type){
MpegEncContext * const s = &h->s;
@ -1327,14 +1327,10 @@ static void av_unused decode_mb_skip(H264Context *h){
}
else
{
int mx, my;
mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
fill_decode_neighbors(h, mb_type);
fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ...
pred_pskip_motion(h, &mx, &my);
fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
pred_pskip_motion(h);
}
write_back_motion(h, mb_type);

View File

@ -1819,8 +1819,7 @@ static av_always_inline void decode_cabac_luma_residual( H264Context *h, const u
}
}
} else {
uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+16*p] ];
nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
fill_rectangle(&h->non_zero_count_cache[scan8[4*i8x8+16*p]], 2, 2, 8, 0, 1);
}
}
}

View File

@ -216,7 +216,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
MpegEncContext * const s = &h->s;
int mb_xy;
int mb_type, left_type, top_type;
int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
int qp, qp0, qp1, qpc, qpc0, qpc1;
int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
int chroma444 = CHROMA444;
@ -241,10 +241,6 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
qp1 = (qp + qp1 + 1) >> 1;
qpc0 = (qpc + qpc0 + 1) >> 1;
qpc1 = (qpc + qpc1 + 1) >> 1;
qp_thresh = 15+52 - h->slice_alpha_c0_offset;
if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
return;
if( IS_INTRA(mb_type) ) {
static const int16_t bS4[4] = {4,4,4,4};
@ -321,7 +317,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
} else {
LOCAL_ALIGNED_8(int16_t, bS, [2], [4][4]);
int edges;
if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 && !chroma444 ) {
edges = 4;
AV_WN64A(bS[0][0], 0x0002000200020002ULL);
AV_WN64A(bS[0][2], 0x0002000200020002ULL);

View File

@ -35,7 +35,7 @@
//#undef NDEBUG
#include <assert.h>
static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
MpegEncContext *s = &h->s;
@ -92,7 +92,7 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in
* @param mx the x component of the predicted motion vector
* @param my the y component of the predicted motion vector
*/
static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
static av_always_inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
const int index8= scan8[n];
const int top_ref= h->ref_cache[list][ index8 - 8 ];
const int left_ref= h->ref_cache[list][ index8 - 1 ];
@ -147,7 +147,7 @@ static inline void pred_motion(H264Context * const h, int n, int part_width, int
* @param mx the x component of the predicted motion vector
* @param my the y component of the predicted motion vector
*/
static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
if(n==0){
const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
@ -182,7 +182,7 @@ static inline void pred_16x8_motion(H264Context * const h, int n, int list, int
* @param mx the x component of the predicted motion vector
* @param my the y component of the predicted motion vector
*/
static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
if(n==0){
const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
@ -213,22 +213,117 @@ static inline void pred_8x16_motion(H264Context * const h, int n, int list, int
pred_motion(h, n, 2, list, ref, mx, my);
}
static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
#define FIX_MV_MBAFF(type, refn, mvn, idx)\
if(FRAME_MBAFF){\
if(MB_FIELD){\
if(!IS_INTERLACED(type)){\
refn <<= 1;\
AV_COPY32(mvbuf[idx], mvn);\
mvbuf[idx][1] /= 2;\
mvn = mvbuf[idx];\
}\
}else{\
if(IS_INTERLACED(type)){\
refn >>= 1;\
AV_COPY32(mvbuf[idx], mvn);\
mvbuf[idx][1] <<= 1;\
mvn = mvbuf[idx];\
}\
}\
}
static av_always_inline void pred_pskip_motion(H264Context * const h){
DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = {0};
DECLARE_ALIGNED(4, int16_t, mvbuf)[3][2];
MpegEncContext * const s = &h->s;
int8_t *ref = s->current_picture.ref_index[0];
int16_t (*mv)[2] = s->current_picture.motion_val[0];
int top_ref, left_ref, diagonal_ref, match_count, mx, my;
const int16_t *A, *B, *C;
int b_stride = h->b_stride;
fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
/* To avoid doing an entire fill_decode_caches, we inline the relevant parts here.
* FIXME: this is a partial duplicate of the logic in fill_decode_caches, but it's
* faster this way. Is there a way to avoid this duplication?
*/
if(USES_LIST(h->left_type[LTOP], 0)){
left_ref = ref[4*h->left_mb_xy[LTOP] + 1 + (h->left_block[0]&~1)];
A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride*h->left_block[0]];
FIX_MV_MBAFF(h->left_type[LTOP], left_ref, A, 0);
if(!(left_ref | AV_RN32A(A))){
goto zeromv;
}
}else if(h->left_type[LTOP]){
left_ref = LIST_NOT_USED;
A = zeromv;
}else{
goto zeromv;
}
if(USES_LIST(h->top_type, 0)){
top_ref = ref[4*h->top_mb_xy + 2];
B = mv[h->mb2b_xy[h->top_mb_xy] + 3*b_stride];
FIX_MV_MBAFF(h->top_type, top_ref, B, 1);
if(!(top_ref | AV_RN32A(B))){
goto zeromv;
}
}else if(h->top_type){
top_ref = LIST_NOT_USED;
B = zeromv;
}else{
goto zeromv;
}
tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
|| !( top_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 8 ]))
|| !(left_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 1 ]))){
*mx = *my = 0;
return;
if(USES_LIST(h->topright_type, 0)){
diagonal_ref = ref[4*h->topright_mb_xy + 2];
C = mv[h->mb2b_xy[h->topright_mb_xy] + 3*b_stride];
FIX_MV_MBAFF(h->topright_type, diagonal_ref, C, 2);
}else if(h->topright_type){
diagonal_ref = LIST_NOT_USED;
C = zeromv;
}else{
if(USES_LIST(h->topleft_type, 0)){
diagonal_ref = ref[4*h->topleft_mb_xy + 1 + (h->topleft_partition & 2)];
C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride)];
FIX_MV_MBAFF(h->topleft_type, diagonal_ref, C, 2);
}else if(h->topleft_type){
diagonal_ref = LIST_NOT_USED;
C = zeromv;
}else{
diagonal_ref = PART_NOT_AVAILABLE;
C = zeromv;
}
}
pred_motion(h, 0, 4, 0, 0, mx, my);
match_count= !diagonal_ref + !top_ref + !left_ref;
tprintf(h->s.avctx, "pred_pskip_motion match_count=%d\n", match_count);
if(match_count > 1){
mx = mid_pred(A[0], B[0], C[0]);
my = mid_pred(A[1], B[1], C[1]);
}else if(match_count==1){
if(!left_ref){
mx = A[0];
my = A[1];
}else if(!top_ref){
mx = B[0];
my = B[1];
}else{
mx = C[0];
my = C[1];
}
}else{
mx = mid_pred(A[0], B[0], C[0]);
my = mid_pred(A[1], B[1], C[1]);
}
fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
return;
zeromv:
fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
return;
}

View File

@ -750,7 +750,7 @@ static int xvid_ff_2pass_before(struct xvid_context *ref,
static int xvid_ff_2pass_after(struct xvid_context *ref,
xvid_plg_data_t *param) {
char *log = ref->twopassbuffer;
char *frame_types = " ipbs";
const char *frame_types = " ipbs";
char frame_type;
/* Quick bounds check */

View File

@ -575,7 +575,11 @@ void MPV_decode_defaults(MpegEncContext *s){
*/
av_cold int MPV_common_init(MpegEncContext *s)
{
int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y, threads;
int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y,
threads = (s->encoding ||
(HAVE_THREADS &&
s->avctx->active_thread_type & FF_THREAD_SLICE)) ?
s->avctx->thread_count : 1;
if(s->codec_id == CODEC_ID_MPEG2VIDEO && !s->progressive_sequence)
s->mb_height = (s->height + 31) / 32 * 2;
@ -589,8 +593,10 @@ av_cold int MPV_common_init(MpegEncContext *s)
if((s->encoding || (s->avctx->active_thread_type & FF_THREAD_SLICE)) &&
(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height))){
av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
return -1;
int max_threads = FFMIN(MAX_THREADS, s->mb_height);
av_log(s->avctx, AV_LOG_WARNING, "too many threads (%d), reducing to %d\n",
s->avctx->thread_count, max_threads);
threads = max_threads;
}
if((s->width || s->height) && av_image_check_size(s->width, s->height, 0, s->avctx))
@ -747,8 +753,6 @@ av_cold int MPV_common_init(MpegEncContext *s)
s->thread_context[0]= s;
if (s->encoding || (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_SLICE)) {
threads = s->avctx->thread_count;
for(i=1; i<threads; i++){
s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
memcpy(s->thread_context[i], s, sizeof(MpegEncContext));

View File

@ -42,7 +42,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =
{0x8000000080000000ULL, 0x8000000080000000ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_1 ) = 0x0001000100010001ULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1 ) = {0x0001000100010001ULL, 0x0001000100010001ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_2 ) = {0x0002000200020002ULL, 0x0002000200020002ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_3 ) = {0x0003000300030003ULL, 0x0003000300030003ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4 ) = {0x0004000400040004ULL, 0x0004000400040004ULL};

View File

@ -29,9 +29,13 @@ SECTION_RODATA
SECTION .text
cextern pw_8
cextern pw_4
cextern pw_2
cextern pw_1
; dest, left, right, src
; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
%macro PRED4x4_LOWPASS 4
paddw %2, %3
psrlw %2, 1
@ -335,3 +339,930 @@ cglobal pred8x8_horizontal_10_sse2, 2,3
dec r2
jg .loop
REP_RET
;-----------------------------------------------------------------------------
; void predict_8x8_dc(pixel *src, int stride)
;-----------------------------------------------------------------------------
%macro MOV8 2-3
; sort of a hack, but it works
%if mmsize==8
movq [%1+0], %2
movq [%1+8], %3
%else
movdqa [%1], %2
%endif
%endmacro
%macro PRED8x8_DC 2
cglobal pred8x8_dc_10_%1, 2,4
%ifdef ARCH_X86_64
%define t0 r10
%else
%define t0 r0m
%endif
sub r0, r1
pxor m4, m4
movq m0, [r0+0]
movq m1, [r0+8]
HADDW m0, m2
mov t0, r0
HADDW m1, m2
movzx r2d, word [r0+r1*1-2]
movzx r3d, word [r0+r1*2-2]
lea r0, [r0+r1*2]
add r2d, r3d
movzx r3d, word [r0+r1*1-2]
add r2d, r3d
movzx r3d, word [r0+r1*2-2]
add r2d, r3d
lea r0, [r0+r1*2]
movd m2, r2d ; s2
movzx r2d, word [r0+r1*1-2]
movzx r3d, word [r0+r1*2-2]
lea r0, [r0+r1*2]
add r2d, r3d
movzx r3d, word [r0+r1*1-2]
add r2d, r3d
movzx r3d, word [r0+r1*2-2]
add r2d, r3d
movd m3, r2d ; s3
punpcklwd m0, m1
mov r0, t0
punpcklwd m2, m3
punpckldq m0, m2 ; s0, s1, s2, s3
%2 m3, m0, 11110110b ; s2, s1, s3, s3
lea r2, [r1+r1*2]
%2 m0, m0, 01110100b ; s0, s1, s3, s1
paddw m0, m3
lea r3, [r0+r1*4]
psrlw m0, 2
pavgw m0, m4 ; s0+s2, s1, s3, s1+s3
%ifidn %1, sse2
punpcklwd m0, m0
pshufd m3, m0, 11111010b
punpckldq m0, m0
SWAP 0,1
%else
pshufw m1, m0, 0x00
pshufw m2, m0, 0x55
pshufw m3, m0, 0xaa
pshufw m4, m0, 0xff
%endif
MOV8 r0+r1*1, m1, m2
MOV8 r0+r1*2, m1, m2
MOV8 r0+r2*1, m1, m2
MOV8 r0+r1*4, m1, m2
MOV8 r3+r1*1, m3, m4
MOV8 r3+r1*2, m3, m4
MOV8 r3+r2*1, m3, m4
MOV8 r3+r1*4, m3, m4
RET
%endmacro
INIT_MMX
PRED8x8_DC mmxext, pshufw
INIT_XMM
PRED8x8_DC sse2 , pshuflw
;-----------------------------------------------------------------------------
; void pred8x8_top_dc(pixel *src, int stride)
;-----------------------------------------------------------------------------
%macro PRED8x8_TOP_DC 2
cglobal pred8x8_top_dc_10_%1, 2,4
sub r0, r1
movq m0, [r0+0]
movq m1, [r0+8]
HADDW m0, m2
HADDW m1, m3
lea r2, [r1+r1*2]
paddw m0, [pw_2]
paddw m1, [pw_2]
lea r3, [r0+r1*4]
psrlw m0, 2
psrlw m1, 2
%2 m0, m0, 0
%2 m1, m1, 0
%ifidn %1, sse2
punpcklqdq m0, m1
%endif
MOV8 r0+r1*1, m0, m1
MOV8 r0+r1*2, m0, m1
MOV8 r0+r2*1, m0, m1
MOV8 r0+r1*4, m0, m1
MOV8 r3+r1*1, m0, m1
MOV8 r3+r1*2, m0, m1
MOV8 r3+r2*1, m0, m1
MOV8 r3+r1*4, m0, m1
RET
%endmacro
INIT_MMX
PRED8x8_TOP_DC mmxext, pshufw
INIT_XMM
PRED8x8_TOP_DC sse2 , pshuflw
;-----------------------------------------------------------------------------
; void pred8x8l_top_dc(pixel *src, int has_topleft, int has_topright, int stride)
;-----------------------------------------------------------------------------
%macro PRED8x8L_TOP_DC 1
cglobal pred8x8l_top_dc_10_%1, 4,4,6
sub r0, r3
pxor m7, m7
mova m0, [r0-16]
mova m3, [r0]
mova m1, [r0+16]
mova m2, m3
mova m4, m3
PALIGNR m2, m0, 14, m0
PALIGNR m1, m4, 2, m4
test r1, r1 ; top_left
jz .fix_lt_2
test r2, r2 ; top_right
jz .fix_tr_1
jmp .body
.fix_lt_2:
mova m5, m3
pxor m5, m2
pslldq m5, 14
psrldq m5, 14
pxor m2, m5
test r2, r2 ; top_right
jnz .body
.fix_tr_1:
mova m5, m3
pxor m5, m1
psrldq m5, 14
pslldq m5, 14
pxor m1, m5
.body
lea r1, [r3+r3*2]
lea r2, [r0+r3*4]
PRED4x4_LOWPASS m0, m2, m1, m3
HADDW m0, m1
paddw m0, [pw_4]
psrlw m0, 3
SPLATW m0, m0, 0
mova [r0+r3*1], m0
mova [r0+r3*2], m0
mova [r0+r1*1], m0
mova [r0+r3*4], m0
mova [r2+r3*1], m0
mova [r2+r3*2], m0
mova [r2+r1*1], m0
mova [r2+r3*4], m0
RET
%endmacro
INIT_XMM
%define PALIGNR PALIGNR_MMX
PRED8x8L_TOP_DC sse2
%define PALIGNR PALIGNR_SSSE3
PRED8x8L_TOP_DC ssse3
;-----------------------------------------------------------------------------
;void pred8x8l_dc(pixel *src, int has_topleft, int has_topright, int stride)
;-----------------------------------------------------------------------------
;TODO: see if scalar is faster
%macro PRED8x8L_DC 1
cglobal pred8x8l_dc_10_%1, 4,5,8
sub r0, r3
lea r4, [r0+r3*2]
mova m0, [r0+r3*1-16]
punpckhwd m0, [r0+r3*0-16]
mova m1, [r4+r3*1-16]
punpckhwd m1, [r0+r3*2-16]
mov r4, r0
punpckhdq m1, m0
lea r0, [r0+r3*4]
mova m2, [r0+r3*1-16]
punpckhwd m2, [r0+r3*0-16]
lea r0, [r0+r3*2]
mova m3, [r0+r3*1-16]
punpckhwd m3, [r0+r3*0-16]
punpckhdq m3, m2
punpckhqdq m3, m1
lea r0, [r0+r3*2]
mova m0, [r0+r3*0-16]
mova m1, [r4]
mov r0, r4
mova m4, m3
mova m2, m3
PALIGNR m4, m0, 14, m0
PALIGNR m1, m2, 2, m2
test r1, r1
jnz .do_left
.fix_lt_1:
mova m5, m3
pxor m5, m4
psrldq m5, 14
pslldq m5, 12
pxor m1, m5
jmp .do_left
.fix_lt_2:
mova m5, m3
pxor m5, m2
pslldq m5, 14
psrldq m5, 14
pxor m2, m5
test r2, r2
jnz .body
.fix_tr_1:
mova m5, m3
pxor m5, m1
psrldq m5, 14
pslldq m5, 14
pxor m1, m5
jmp .body
.do_left:
mova m0, m4
PRED4x4_LOWPASS m2, m1, m4, m3
mova m4, m0
mova m7, m2
PRED4x4_LOWPASS m1, m3, m0, m4
pslldq m1, 14
PALIGNR m7, m1, 14, m3
mova m0, [r0-16]
mova m3, [r0]
mova m1, [r0+16]
mova m2, m3
mova m4, m3
PALIGNR m2, m0, 14, m0
PALIGNR m1, m4, 2, m4
test r1, r1
jz .fix_lt_2
test r2, r2
jz .fix_tr_1
.body
lea r1, [r3+r3*2]
PRED4x4_LOWPASS m6, m2, m1, m3
HADDW m7, m0
HADDW m6, m0
lea r2, [r0+r3*4]
paddw m7, [pw_8]
paddw m7, m6
psrlw m7, 4
SPLATW m7, m7
mova [r0+r3*1], m7
mova [r0+r3*2], m7
mova [r0+r1*1], m7
mova [r0+r3*4], m7
mova [r2+r3*1], m7
mova [r2+r3*2], m7
mova [r2+r1*1], m7
mova [r2+r3*4], m7
RET
%endmacro
INIT_XMM
%define PALIGNR PALIGNR_MMX
PRED8x8L_DC sse2
%define PALIGNR PALIGNR_SSSE3
PRED8x8L_DC ssse3
;-----------------------------------------------------------------------------
; void pred8x8l_vertical(pixel *src, int has_topleft, int has_topright, int stride)
;-----------------------------------------------------------------------------
%macro PRED8x8L_VERTICAL 1
cglobal pred8x8l_vertical_10_%1, 4,4,6
sub r0, r3
mova m0, [r0-16]
mova m3, [r0]
mova m1, [r0+16]
mova m2, m3
mova m4, m3
PALIGNR m2, m0, 14, m0
PALIGNR m1, m4, 2, m4
test r1, r1 ; top_left
jz .fix_lt_2
test r2, r2 ; top_right
jz .fix_tr_1
jmp .body
.fix_lt_2:
mova m5, m3
pxor m5, m2
pslldq m5, 14
psrldq m5, 14
pxor m2, m5
test r2, r2 ; top_right
jnz .body
.fix_tr_1:
mova m5, m3
pxor m5, m1
psrldq m5, 14
pslldq m5, 14
pxor m1, m5
.body
lea r1, [r3+r3*2]
lea r2, [r0+r3*4]
PRED4x4_LOWPASS m0, m2, m1, m3
mova [r0+r3*1], m0
mova [r0+r3*2], m0
mova [r0+r1*1], m0
mova [r0+r3*4], m0
mova [r2+r3*1], m0
mova [r2+r3*2], m0
mova [r2+r1*1], m0
mova [r2+r3*4], m0
RET
%endmacro
INIT_XMM
%define PALIGNR PALIGNR_MMX
PRED8x8L_VERTICAL sse2
%define PALIGNR PALIGNR_SSSE3
PRED8x8L_VERTICAL ssse3
;-----------------------------------------------------------------------------
; void pred8x8l_horizontal(uint8_t *src, int has_topleft, int has_topright, int stride)
;-----------------------------------------------------------------------------
%macro PRED8x8L_HORIZONTAL 1
cglobal pred8x8l_horizontal_10_%1, 4,4,8
sub r0, r3
lea r2, [r0+r3*2]
mova m0, [r0+r3*1-16]
test r1, r1
lea r1, [r0+r3]
cmovnz r1, r0
punpckhwd m0, [r1+r3*0-16]
mova m1, [r2+r3*1-16]
punpckhwd m1, [r0+r3*2-16]
mov r2, r0
punpckhdq m1, m0
lea r0, [r0+r3*4]
mova m2, [r0+r3*1-16]
punpckhwd m2, [r0+r3*0-16]
lea r0, [r0+r3*2]
mova m3, [r0+r3*1-16]
punpckhwd m3, [r0+r3*0-16]
punpckhdq m3, m2
punpckhqdq m3, m1
lea r0, [r0+r3*2]
mova m0, [r0+r3*0-16]
mova m1, [r1+r3*0-16]
mov r0, r2
mova m4, m3
mova m2, m3
PALIGNR m4, m0, 14, m0
PALIGNR m1, m2, 2, m2
mova m0, m4
PRED4x4_LOWPASS m2, m1, m4, m3
mova m4, m0
mova m7, m2
PRED4x4_LOWPASS m1, m3, m0, m4
pslldq m1, 14
PALIGNR m7, m1, 14, m3
lea r1, [r3+r3*2]
punpckhwd m3, m7, m7
punpcklwd m7, m7
pshufd m0, m3, 0xff
pshufd m1, m3, 0xaa
lea r2, [r0+r3*4]
pshufd m2, m3, 0x55
pshufd m3, m3, 0x00
pshufd m4, m7, 0xff
pshufd m5, m7, 0xaa
pshufd m6, m7, 0x55
pshufd m7, m7, 0x00
mova [r0+r3*1], m0
mova [r0+r3*2], m1
mova [r0+r1*1], m2
mova [r0+r3*4], m3
mova [r2+r3*1], m4
mova [r2+r3*2], m5
mova [r2+r1*1], m6
mova [r2+r3*4], m7
RET
%endmacro
INIT_XMM
%define PALIGNR PALIGNR_MMX
PRED8x8L_HORIZONTAL sse2
%define PALIGNR PALIGNR_SSSE3
PRED8x8L_HORIZONTAL ssse3
;-----------------------------------------------------------------------------
;void pred8x8l_down_left(pixel *src, int has_topleft, int has_topright, int stride)
;-----------------------------------------------------------------------------
%macro PRED8x8L_DOWN_LEFT 1
cglobal pred8x8l_down_left_10_%1, 4,4,8
sub r0, r3
mova m0, [r0-16]
mova m3, [r0]
mova m1, [r0+16]
mova m2, m3
mova m4, m3
PALIGNR m2, m0, 14, m0
PALIGNR m1, m4, 2, m4
test r1, r1
jz .fix_lt_2
test r2, r2
jz .fix_tr_1
jmp .do_top
.fix_lt_2:
mova m5, m3
pxor m5, m2
pslldq m5, 14
psrldq m5, 14
pxor m2, m5
test r2, r2
jnz .do_top
.fix_tr_1:
mova m5, m3
pxor m5, m1
psrldq m5, 14
pslldq m5, 14
pxor m1, m5
jmp .do_top
.fix_tr_2:
punpckhwd m3, m3
pshufd m1, m3, 0xFF
jmp .do_topright
.do_top:
PRED4x4_LOWPASS m4, m2, m1, m3
mova m7, m4
test r2, r2
jz .fix_tr_2
mova m0, [r0+16]
mova m5, m0
mova m2, m0
mova m4, m0
psrldq m5, 14
PALIGNR m2, m3, 14, m3
PALIGNR m5, m4, 2, m4
PRED4x4_LOWPASS m1, m2, m5, m0
.do_topright:
lea r1, [r3+r3*2]
mova m6, m1
psrldq m1, 14
mova m4, m1
lea r2, [r0+r3*4]
mova m2, m6
PALIGNR m2, m7, 2, m0
mova m3, m6
PALIGNR m3, m7, 14, m0
PALIGNR m4, m6, 2, m0
mova m5, m7
mova m1, m7
mova m7, m6
pslldq m1, 2
PRED4x4_LOWPASS m0, m1, m2, m5
PRED4x4_LOWPASS m1, m3, m4, m7
mova [r2+r3*4], m1
mova m2, m0
pslldq m1, 2
psrldq m2, 14
pslldq m0, 2
por m1, m2
mova [r2+r1*1], m1
mova m2, m0
pslldq m1, 2
psrldq m2, 14
pslldq m0, 2
por m1, m2
mova [r2+r3*2], m1
mova m2, m0
pslldq m1, 2
psrldq m2, 14
pslldq m0, 2
por m1, m2
mova [r2+r3*1], m1
mova m2, m0
pslldq m1, 2
psrldq m2, 14
pslldq m0, 2
por m1, m2
mova [r0+r3*4], m1
mova m2, m0
pslldq m1, 2
psrldq m2, 14
pslldq m0, 2
por m1, m2
mova [r0+r1*1], m1
mova m2, m0
pslldq m1, 2
psrldq m2, 14
pslldq m0, 2
por m1, m2
mova [r0+r3*2], m1
pslldq m1, 2
psrldq m0, 14
por m1, m0
mova [r0+r3*1], m1
RET
%endmacro
INIT_XMM
%define PALIGNR PALIGNR_MMX
PRED8x8L_DOWN_LEFT sse2
%define PALIGNR PALIGNR_SSSE3
PRED8x8L_DOWN_LEFT ssse3
;-----------------------------------------------------------------------------
;void pred8x8l_down_right_mxext(pixel *src, int has_topleft, int has_topright, int stride)
;-----------------------------------------------------------------------------
%macro PRED8x8L_DOWN_RIGHT 1
cglobal pred8x8l_down_right_10_%1, 4,5,8
sub r0, r3
lea r4, [r0+r3*2]
mova m0, [r0+r3*1-16]
punpckhwd m0, [r0+r3*0-16]
mova m1, [r4+r3*1-16]
punpckhwd m1, [r0+r3*2-16]
mov r4, r0
punpckhdq m1, m0
lea r0, [r0+r3*4]
mova m2, [r0+r3*1-16]
punpckhwd m2, [r0+r3*0-16]
lea r0, [r0+r3*2]
mova m3, [r0+r3*1-16]
punpckhwd m3, [r0+r3*0-16]
punpckhdq m3, m2
punpckhqdq m3, m1
lea r0, [r0+r3*2]
mova m0, [r0+r3*0-16]
mova m1, [r4]
mov r0, r4
mova m4, m3
mova m2, m3
PALIGNR m4, m0, 14, m0
PALIGNR m1, m2, 2, m2
test r1, r1 ; top_left
jz .fix_lt_1
.do_left:
mova m0, m4
PRED4x4_LOWPASS m2, m1, m4, m3
mova m4, m0
mova m7, m2
mova m6, m2
PRED4x4_LOWPASS m1, m3, m0, m4
pslldq m1, 14
PALIGNR m7, m1, 14, m3
mova m0, [r0-16]
mova m3, [r0]
mova m1, [r0+16]
mova m2, m3
mova m4, m3
PALIGNR m2, m0, 14, m0
PALIGNR m1, m4, 2, m4
test r1, r1 ; top_left
jz .fix_lt_2
test r2, r2 ; top_right
jz .fix_tr_1
.do_top:
PRED4x4_LOWPASS m4, m2, m1, m3
mova m5, m4
jmp .body
.fix_lt_1:
mova m5, m3
pxor m5, m4
psrldq m5, 14
pslldq m5, 12
pxor m1, m5
jmp .do_left
.fix_lt_2:
mova m5, m3
pxor m5, m2
pslldq m5, 14
psrldq m5, 14
pxor m2, m5
test r2, r2 ; top_right
jnz .do_top
.fix_tr_1:
mova m5, m3
pxor m5, m1
psrldq m5, 14
pslldq m5, 14
pxor m1, m5
jmp .do_top
.body
lea r1, [r3+r3*2]
mova m1, m7
mova m7, m5
mova m5, m6
mova m2, m7
lea r2, [r0+r3*4]
PALIGNR m2, m6, 2, m0
mova m3, m7
PALIGNR m3, m6, 14, m0
mova m4, m7
psrldq m4, 2
PRED4x4_LOWPASS m0, m1, m2, m5
PRED4x4_LOWPASS m1, m3, m4, m7
mova [r2+r3*4], m0
mova m2, m1
psrldq m0, 2
pslldq m2, 14
psrldq m1, 2
por m0, m2
mova [r2+r1*1], m0
mova m2, m1
psrldq m0, 2
pslldq m2, 14
psrldq m1, 2
por m0, m2
mova [r2+r3*2], m0
mova m2, m1
psrldq m0, 2
pslldq m2, 14
psrldq m1, 2
por m0, m2
mova [r2+r3*1], m0
mova m2, m1
psrldq m0, 2
pslldq m2, 14
psrldq m1, 2
por m0, m2
mova [r0+r3*4], m0
mova m2, m1
psrldq m0, 2
pslldq m2, 14
psrldq m1, 2
por m0, m2
mova [r0+r1*1], m0
mova m2, m1
psrldq m0, 2
pslldq m2, 14
psrldq m1, 2
por m0, m2
mova [r0+r3*2], m0
psrldq m0, 2
pslldq m1, 14
por m0, m1
mova [r0+r3*1], m0
RET
%endmacro
INIT_XMM
%define PALIGNR PALIGNR_MMX
PRED8x8L_DOWN_RIGHT sse2
%define PALIGNR PALIGNR_SSSE3
PRED8x8L_DOWN_RIGHT ssse3
;-----------------------------------------------------------------------------
; void pred8x8l_vertical_right(pixel *src, int has_topleft, int has_topright, int stride)
;-----------------------------------------------------------------------------
%macro PRED8x8L_VERTICAL_RIGHT 1
cglobal pred8x8l_vertical_right_10_%1, 4,5,8
sub r0, r3
lea r4, [r0+r3*2]
mova m0, [r0+r3*1-16]
punpckhwd m0, [r0+r3*0-16]
mova m1, [r4+r3*1-16]
punpckhwd m1, [r0+r3*2-16]
mov r4, r0
punpckhdq m1, m0
lea r0, [r0+r3*4]
mova m2, [r0+r3*1-16]
punpckhwd m2, [r0+r3*0-16]
lea r0, [r0+r3*2]
mova m3, [r0+r3*1-16]
punpckhwd m3, [r0+r3*0-16]
punpckhdq m3, m2
punpckhqdq m3, m1
lea r0, [r0+r3*2]
mova m0, [r0+r3*0-16]
mova m1, [r4]
mov r0, r4
mova m4, m3
mova m2, m3
PALIGNR m4, m0, 14, m0
PALIGNR m1, m2, 2, m2
test r1, r1
jz .fix_lt_1
jmp .do_left
.fix_lt_1:
mova m5, m3
pxor m5, m4
psrldq m5, 14
pslldq m5, 12
pxor m1, m5
jmp .do_left
.fix_lt_2:
mova m5, m3
pxor m5, m2
pslldq m5, 14
psrldq m5, 14
pxor m2, m5
test r2, r2
jnz .do_top
.fix_tr_1:
mova m5, m3
pxor m5, m1
psrldq m5, 14
pslldq m5, 14
pxor m1, m5
jmp .do_top
.do_left:
mova m0, m4
PRED4x4_LOWPASS m2, m1, m4, m3
mova m7, m2
mova m0, [r0-16]
mova m3, [r0]
mova m1, [r0+16]
mova m2, m3
mova m4, m3
PALIGNR m2, m0, 14, m0
PALIGNR m1, m4, 2, m4
test r1, r1
jz .fix_lt_2
test r2, r2
jz .fix_tr_1
.do_top
PRED4x4_LOWPASS m6, m2, m1, m3
lea r1, [r3+r3*2]
mova m2, m6
mova m3, m6
PALIGNR m3, m7, 14, m0
PALIGNR m6, m7, 12, m1
mova m4, m3
pavgw m3, m2
lea r2, [r0+r3*4]
PRED4x4_LOWPASS m0, m6, m2, m4
mova [r0+r3*1], m3
mova [r0+r3*2], m0
mova m5, m0
mova m6, m3
mova m1, m7
mova m2, m1
pslldq m2, 2
mova m3, m1
pslldq m3, 4
PRED4x4_LOWPASS m0, m1, m3, m2
PALIGNR m6, m0, 14, m2
mova [r0+r1*1], m6
pslldq m0, 2
PALIGNR m5, m0, 14, m1
mova [r0+r3*4], m5
pslldq m0, 2
PALIGNR m6, m0, 14, m2
mova [r2+r3*1], m6
pslldq m0, 2
PALIGNR m5, m0, 14, m1
mova [r2+r3*2], m5
pslldq m0, 2
PALIGNR m6, m0, 14, m2
mova [r2+r1*1], m6
pslldq m0, 2
PALIGNR m5, m0, 14, m1
mova [r2+r3*4], m5
RET
%endmacro
INIT_XMM
%define PALIGNR PALIGNR_MMX
PRED8x8L_VERTICAL_RIGHT sse2
%define PALIGNR PALIGNR_SSSE3
PRED8x8L_VERTICAL_RIGHT ssse3
;-----------------------------------------------------------------------------
; void pred8x8l_horizontal_up(pixel *src, int has_topleft, int has_topright, int stride)
;-----------------------------------------------------------------------------
%macro PRED8x8L_HORIZONTAL_UP 1
cglobal pred8x8l_horizontal_up_10_%1, 4,4,8
sub r0, r3
lea r2, [r0+r3*2]
mova m0, [r0+r3*1-16]
test r1, r1
lea r1, [r0+r3]
cmovnz r1, r0
punpckhwd m0, [r1+r3*0-16]
mova m1, [r2+r3*1-16]
punpckhwd m1, [r0+r3*2-16]
mov r2, r0
punpckhdq m1, m0
lea r0, [r0+r3*4]
mova m2, [r0+r3*1-16]
punpckhwd m2, [r0+r3*0-16]
lea r0, [r0+r3*2]
mova m3, [r0+r3*1-16]
punpckhwd m3, [r0+r3*0-16]
punpckhdq m3, m2
punpckhqdq m3, m1
lea r0, [r0+r3*2]
mova m0, [r0+r3*0-16]
mova m1, [r1+r3*0-16]
mov r0, r2
mova m4, m3
mova m2, m3
PALIGNR m4, m0, 14, m0
PALIGNR m1, m2, 2, m2
mova m0, m4
PRED4x4_LOWPASS m2, m1, m4, m3
mova m4, m0
mova m7, m2
PRED4x4_LOWPASS m1, m3, m0, m4
pslldq m1, 14
PALIGNR m7, m1, 14, m3
lea r1, [r3+r3*2]
pshufd m0, m7, 00011011b ; l6 l7 l4 l5 l2 l3 l0 l1
pslldq m7, 14 ; l7 .. .. .. .. .. .. ..
mova m2, m0
pslld m0, 16
psrld m2, 16
por m2, m0 ; l7 l6 l5 l4 l3 l2 l1 l0
mova m3, m2
mova m4, m2
mova m5, m2
psrldq m2, 2
psrldq m3, 4
lea r2, [r0+r3*4]
por m2, m7 ; l7 l7 l6 l5 l4 l3 l2 l1
punpckhwd m7, m7
por m3, m7 ; l7 l7 l7 l6 l5 l4 l3 l2
pavgw m4, m2
PRED4x4_LOWPASS m1, m3, m5, m2
mova m5, m4
punpcklwd m4, m1 ; p4 p3 p2 p1
punpckhwd m5, m1 ; p8 p7 p6 p5
mova m6, m5
mova m7, m5
mova m0, m5
PALIGNR m5, m4, 4, m1
pshufd m1, m6, 11111001b
PALIGNR m6, m4, 8, m2
pshufd m2, m7, 11111110b
PALIGNR m7, m4, 12, m3
pshufd m3, m0, 11111111b
mova [r0+r3*1], m4
mova [r0+r3*2], m5
mova [r0+r1*1], m6
mova [r0+r3*4], m7
mova [r2+r3*1], m0
mova [r2+r3*2], m1
mova [r2+r1*1], m2
mova [r2+r3*4], m3
RET
%endmacro
INIT_XMM
%define PALIGNR PALIGNR_MMX
PRED8x8L_HORIZONTAL_UP sse2
%define PALIGNR PALIGNR_SSSE3
PRED8x8L_HORIZONTAL_UP ssse3
;-----------------------------------------------------------------------------
; void pred16x16_vertical(pixel *src, int stride)
;-----------------------------------------------------------------------------
%macro MOV16 3-5
mova [%1+ 0], %2
mova [%1+mmsize], %3
%if mmsize==8
mova [%1+ 16], %4
mova [%1+ 24], %5
%endif
%endmacro
%macro PRED16x16_VERTICAL 1
cglobal pred16x16_vertical_10_%1, 2,3
sub r0, r1
mov r2, 8
mova m0, [r0+ 0]
mova m1, [r0+mmsize]
%if mmsize==8
mova m2, [r0+16]
mova m3, [r0+24]
%endif
.loop:
MOV16 r0+r1*1, m0, m1, m2, m3
MOV16 r0+r1*2, m0, m1, m2, m3
lea r0, [r0+r1*2]
dec r2
jg .loop
REP_RET
%endmacro
INIT_MMX
PRED16x16_VERTICAL mmxext
INIT_XMM
PRED16x16_VERTICAL sse2
;-----------------------------------------------------------------------------
; void pred16x16_horizontal(pixel *src, int stride)
;-----------------------------------------------------------------------------
%macro PRED16x16_HORIZONTAL 1
cglobal pred16x16_horizontal_10_%1, 2,3
mov r2, 8
.vloop:
movd m0, [r0+r1*0-4]
movd m1, [r0+r1*1-4]
SPLATW m0, m0, 1
SPLATW m1, m1, 1
MOV16 r0+r1*0, m0, m0, m0, m0
MOV16 r0+r1*1, m1, m1, m1, m1
lea r0, [r0+r1*2]
dec r2
jge .vloop
REP_RET
%endmacro
INIT_MMX
PRED16x16_HORIZONTAL mmxext
INIT_XMM
PRED16x16_HORIZONTAL sse2

View File

@ -43,9 +43,41 @@ PRED4x4(horizontal_down, 10, avx)
#define PRED8x8(TYPE, DEPTH, OPT) \
void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int stride);
PRED8x8(dc, 10, mmxext)
PRED8x8(dc, 10, sse2)
PRED8x8(top_dc, 10, mmxext)
PRED8x8(top_dc, 10, sse2)
PRED8x8(vertical, 10, sse2)
PRED8x8(horizontal, 10, sse2)
#define PRED8x8L(TYPE, DEPTH, OPT)\
void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int has_topleft, int has_topright, int stride);
PRED8x8L(dc, 10, sse2)
PRED8x8L(dc, 10, ssse3)
PRED8x8L(top_dc, 10, sse2)
PRED8x8L(top_dc, 10, ssse3)
PRED8x8L(vertical, 10, sse2)
PRED8x8L(vertical, 10, ssse3)
PRED8x8L(horizontal, 10, sse2)
PRED8x8L(horizontal, 10, ssse3)
PRED8x8L(down_left, 10, sse2)
PRED8x8L(down_left, 10, ssse3)
PRED8x8L(down_right, 10, sse2)
PRED8x8L(down_right, 10, ssse3)
PRED8x8L(vertical_right, 10, sse2)
PRED8x8L(vertical_right, 10, ssse3)
PRED8x8L(horizontal_up, 10, sse2)
PRED8x8L(horizontal_up, 10, ssse3)
#define PRED16x16(TYPE, DEPTH, OPT)\
void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int stride);
PRED16x16(vertical, 10, mmxext)
PRED16x16(vertical, 10, sse2)
PRED16x16(horizontal, 10, mmxext)
PRED16x16(horizontal, 10, sse2)
void ff_pred16x16_vertical_mmx (uint8_t *src, int stride);
void ff_pred16x16_vertical_sse (uint8_t *src, int stride);
void ff_pred16x16_horizontal_mmx (uint8_t *src, int stride);
@ -253,6 +285,12 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
if (mm_flags & AV_CPU_FLAG_MMX2) {
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext;
h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext;
h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_mmxext;
h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_10_mmxext;
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_mmxext;
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_mmxext;
}
if (mm_flags & AV_CPU_FLAG_SSE2) {
h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
@ -261,13 +299,33 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_sse2;
h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_sse2;
h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_sse2;
h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_10_sse2;
h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vertical_10_sse2;
h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_10_sse2;
h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_sse2;
h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_sse2;
h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_10_sse2;
h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_10_sse2;
h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_sse2;
h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_sse2;
h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_sse2;
h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_sse2;
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_sse2;
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_sse2;
}
if (mm_flags & AV_CPU_FLAG_SSSE3) {
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3;
h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_ssse3;
h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_ssse3;
h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_ssse3;
h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_ssse3;
h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_10_ssse3;
h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_10_ssse3;
h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_ssse3;
}
#if HAVE_AVX
if (mm_flags & AV_CPU_FLAG_AVX) {

View File

@ -528,6 +528,14 @@
%endif
%endmacro
%macro SPLATD 2-3 0
%if mmsize == 16
pshufd %1, %2, (%3)*0x55
%else
pshufw %1, %2, (%3)*0x11 + ((%3)+1)*0x44
%endif
%endmacro
%macro CLIPW 3 ;(dst, min, max)
pmaxsw %1, %2
pminsw %1, %3

View File

@ -260,6 +260,7 @@ av_cold int ff_alsa_open(AVFormatContext *ctx, snd_pcm_stream_t mode,
}
snd_pcm_hw_params_get_buffer_size_max(hw_params, &buffer_size);
buffer_size = FFMIN(buffer_size, ALSA_BUFFER_SIZE_MAX);
/* TODO: maybe use ctx->max_picture_buffer somehow */
res = snd_pcm_hw_params_set_buffer_size_near(h, hw_params, &buffer_size);
if (res < 0) {
@ -269,6 +270,8 @@ av_cold int ff_alsa_open(AVFormatContext *ctx, snd_pcm_stream_t mode,
}
snd_pcm_hw_params_get_period_size_min(hw_params, &period_size, NULL);
if (!period_size)
period_size = buffer_size / 4;
res = snd_pcm_hw_params_set_period_size_near(h, hw_params, &period_size, NULL);
if (res < 0) {
av_log(ctx, AV_LOG_ERROR, "cannot set ALSA period size (%s)\n",

View File

@ -43,6 +43,8 @@
typedef void (*ff_reorder_func)(const void *, void *, int);
#define ALSA_BUFFER_SIZE_MAX 32768
typedef struct {
AVClass *class;
snd_pcm_t *h;

View File

@ -264,7 +264,7 @@ static int gxf_header(AVFormatContext *s, AVFormatParameters *ap) {
int map_len;
int len;
AVRational main_timebase = {0, 0};
struct gxf_stream_info si;
struct gxf_stream_info *si = s->priv_data;
int i;
if (!parse_packet_header(pb, &pkt_type, &map_len) || pkt_type != PKT_MAP) {
av_log(s, AV_LOG_ERROR, "map packet not found\n");
@ -282,7 +282,7 @@ static int gxf_header(AVFormatContext *s, AVFormatParameters *ap) {
return 0;
}
map_len -= len;
gxf_material_tags(pb, &len, &si);
gxf_material_tags(pb, &len, si);
avio_skip(pb, len);
map_len -= 2;
len = avio_rb16(pb); // length of track description
@ -300,7 +300,7 @@ static int gxf_header(AVFormatContext *s, AVFormatParameters *ap) {
track_id = avio_r8(pb);
track_len = avio_rb16(pb);
len -= track_len;
gxf_track_tags(pb, &track_len, &si);
gxf_track_tags(pb, &track_len, si);
avio_skip(pb, track_len);
if (!(track_type & 0x80)) {
av_log(s, AV_LOG_ERROR, "invalid track type %x\n", track_type);
@ -316,12 +316,12 @@ static int gxf_header(AVFormatContext *s, AVFormatParameters *ap) {
if (idx < 0) continue;
st = s->streams[idx];
if (!main_timebase.num || !main_timebase.den) {
main_timebase.num = si.frames_per_second.den;
main_timebase.den = si.frames_per_second.num * 2;
main_timebase.num = si->frames_per_second.den;
main_timebase.den = si->frames_per_second.num * 2;
}
st->start_time = si.first_field;
if (si.first_field != AV_NOPTS_VALUE && si.last_field != AV_NOPTS_VALUE)
st->duration = si.last_field - si.first_field;
st->start_time = si->first_field;
if (si->first_field != AV_NOPTS_VALUE && si->last_field != AV_NOPTS_VALUE)
st->duration = si->last_field - si->first_field;
}
if (len < 0)
av_log(s, AV_LOG_ERROR, "invalid track description length specified\n");
@ -422,7 +422,9 @@ static int gxf_packet(AVFormatContext *s, AVPacket *pkt) {
AVIOContext *pb = s->pb;
GXFPktType pkt_type;
int pkt_len;
while (!url_feof(pb)) {
struct gxf_stream_info *si = s->priv_data;
while (!pb->eof_reached) {
AVStream *st;
int track_type, track_id, ret;
int field_nr, field_info, skip = 0;
@ -473,6 +475,11 @@ static int gxf_packet(AVFormatContext *s, AVPacket *pkt) {
avio_skip(pb, skip);
pkt->stream_index = stream_index;
pkt->dts = field_nr;
//set duration manually for DV or else lavf misdetects the frame rate
if (st->codec->codec_id == CODEC_ID_DVVIDEO)
pkt->duration = si->fields_per_frame;
return ret;
}
return AVERROR(EIO);
@ -518,7 +525,7 @@ static int64_t gxf_read_timestamp(AVFormatContext *s, int stream_index,
AVInputFormat ff_gxf_demuxer = {
"gxf",
NULL_IF_CONFIG_SMALL("GXF format"),
0,
sizeof(struct gxf_stream_info),
gxf_probe,
gxf_header,
gxf_packet,

View File

@ -111,7 +111,9 @@ static int film_read_header(AVFormatContext *s,
film->audio_samplerate = AV_RB16(&scratch[24]);
film->audio_channels = scratch[21];
film->audio_bits = scratch[22];
if (film->audio_bits == 8)
if (scratch[23] == 2)
film->audio_type = CODEC_ID_ADPCM_ADX;
else if (film->audio_bits == 8)
film->audio_type = CODEC_ID_PCM_S8;
else if (film->audio_bits == 16)
film->audio_type = CODEC_ID_PCM_S16BE;
@ -149,12 +151,19 @@ static int film_read_header(AVFormatContext *s,
st->codec->codec_id = film->audio_type;
st->codec->codec_tag = 1;
st->codec->channels = film->audio_channels;
st->codec->bits_per_coded_sample = film->audio_bits;
st->codec->sample_rate = film->audio_samplerate;
if (film->audio_type == CODEC_ID_ADPCM_ADX) {
st->codec->bits_per_coded_sample = 18 * 8 / 32;
st->codec->block_align = st->codec->channels * 18;
} else {
st->codec->bits_per_coded_sample = film->audio_bits;
st->codec->block_align = st->codec->channels *
st->codec->bits_per_coded_sample / 8;
}
st->codec->bit_rate = st->codec->channels * st->codec->sample_rate *
st->codec->bits_per_coded_sample;
st->codec->block_align = st->codec->channels *
st->codec->bits_per_coded_sample / 8;
}
/* load the sample table */
@ -187,8 +196,12 @@ static int film_read_header(AVFormatContext *s,
film->sample_table[i].pts *= film->base_clock;
film->sample_table[i].pts /= film->audio_samplerate;
audio_frame_counter += (film->sample_table[i].sample_size /
(film->audio_channels * film->audio_bits / 8));
if (film->audio_type == CODEC_ID_ADPCM_ADX)
audio_frame_counter += (film->sample_table[i].sample_size * 32 /
(18 * film->audio_channels));
else
audio_frame_counter += (film->sample_table[i].sample_size /
(film->audio_channels * film->audio_bits / 8));
} else {
film->sample_table[i].stream = film->video_stream_index;
film->sample_table[i].pts = AV_RB32(&scratch[8]) & 0x7FFFFFFF;
@ -227,7 +240,8 @@ static int film_read_packet(AVFormatContext *s,
return AVERROR(ENOMEM);
avio_read(pb, pkt->data, sample->sample_size);
} else if ((sample->stream == film->audio_stream_index) &&
(film->audio_channels == 2)) {
(film->audio_channels == 2) &&
(film->audio_type != CODEC_ID_ADPCM_ADX)) {
/* stereo PCM needs to be interleaved */
if (av_new_packet(pkt, sample->sample_size))