Merge remote branch 'qatar/master'

* qatar/master: (23 commits)
  ac3enc: correct the flipped sign in the ac3_fixed encoder
  Eliminate pointless '#if 1' statements without matching '#else'.
  Add AVX FFT implementation.
  Increase alignment of av_malloc() as needed by AVX ASM.
  Update x86inc.asm from x264 to allow AVX emulation using SSE and MMX.
  mjpeg: Detect overreads in mjpeg_decode_scan() and error out.
  documentation: extend documentation for ffmpeg -aspect option
  APIChanges: update commit hashes for recent additions.
  lavc: deprecate FF_*_TYPE macros in favor of AV_PICTURE_TYPE_* enums
  aac: add headers needed for log2f()
  lavc: remove FF_API_MB_Q cruft
  lavc: remove FF_API_RATE_EMU cruft
  lavc: remove FF_API_HURRY_UP cruft
  pad: make the filter parametric
  vsrc_movie: add key_frame and pict_type.
  vsrc_movie: fix leak in request_frame()
  lavfi: add key_frame and pict_type to AVFilterBufferRefVideo.
  vsrc_buffer: add sample_aspect_ratio fields to arguments.
  lavfi: add fieldorder filter
  scale: make the filter parametric
  ...

Conflicts:
	Changelog
	doc/filters.texi
	ffmpeg.c
	libavcodec/ac3dec.h
	libavcodec/dsputil.c
	libavfilter/avfilter.h
	libavfilter/vf_scale.c
	libavfilter/vf_yadif.c
	libavfilter/vsrc_buffer.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2011-04-27 03:51:04 +02:00
commit d7e5aebae7
64 changed files with 807 additions and 424 deletions

View File

@ -10,7 +10,7 @@ version <next>:
- libxvid aspect pickiness fixed - libxvid aspect pickiness fixed
- Frame multithreaded decoding - Frame multithreaded decoding
- Lots of deprecated API cruft removed - Lots of deprecated API cruft removed
- fft and imdct optimizations for AVX (Sandy Bridge) processors
version 0.7_beta1: version 0.7_beta1:

View File

@ -13,6 +13,17 @@ libavutil: 2011-04-18
API changes, most recent first: API changes, most recent first:
2011-04-XX - bebe72f - lavu 51.1.0 - avutil.h
Add AVPictureType enum and av_get_picture_type_char(), deprecate
FF_*_TYPE defines and av_get_pict_type_char() defined in
libavcodec/avcodec.h.
2011-04-xx - 10d3940 - lavfi 2.3.0 - avfilter.h
Add pict_type and key_frame fields to AVFilterBufferRefVideo.
2011-04-xx - 7a11c82 - lavfi 2.2.0 - vsrc_buffer
Add sample_aspect_ratio fields to vsrc_buffer arguments
2011-04-21 - 94f7451 - lavc 53.1.0 - avcodec.h 2011-04-21 - 94f7451 - lavc 53.1.0 - avcodec.h
Add CODEC_CAP_SLICE_THREADS for codecs supporting sliced threading. Add CODEC_CAP_SLICE_THREADS for codecs supporting sliced threading.

View File

@ -2908,6 +2908,10 @@ static void opt_frame_aspect_ratio(const char *arg)
ffmpeg_exit(1); ffmpeg_exit(1);
} }
frame_aspect_ratio = ar; frame_aspect_ratio = ar;
x = vfilters ? strlen(vfilters) : 0;
vfilters = av_realloc(vfilters, x+100);
snprintf(vfilters+x, x+100, "%csetdar=%f\n", x?',':' ', ar);
} }
static int opt_metadata(const char *opt, const char *arg) static int opt_metadata(const char *opt, const char *arg)

View File

@ -2185,10 +2185,8 @@ static int open_input_stream(HTTPContext *c, const char *info)
} }
} }
#if 1
if (c->fmt_in->iformat->read_seek) if (c->fmt_in->iformat->read_seek)
av_seek_frame(c->fmt_in, -1, stream_pos, 0); av_seek_frame(c->fmt_in, -1, stream_pos, 0);
#endif
/* set the start time (needed for maxtime and RTP packet timing) */ /* set the start time (needed for maxtime and RTP packet timing) */
c->start_time = cur_time; c->start_time = cur_time;
c->first_pts = AV_NOPTS_VALUE; c->first_pts = AV_NOPTS_VALUE;

View File

@ -223,9 +223,9 @@ typedef struct {
float sf[120]; ///< scalefactors float sf[120]; ///< scalefactors
int sf_idx[128]; ///< scalefactor indices (used by encoder) int sf_idx[128]; ///< scalefactor indices (used by encoder)
uint8_t zeroes[128]; ///< band is not coded (used by encoder) uint8_t zeroes[128]; ///< band is not coded (used by encoder)
DECLARE_ALIGNED(16, float, coeffs)[1024]; ///< coefficients for IMDCT DECLARE_ALIGNED(32, float, coeffs)[1024]; ///< coefficients for IMDCT
DECLARE_ALIGNED(16, float, saved)[1024]; ///< overlap DECLARE_ALIGNED(32, float, saved)[1024]; ///< overlap
DECLARE_ALIGNED(16, float, ret)[2048]; ///< PCM output DECLARE_ALIGNED(32, float, ret)[2048]; ///< PCM output
DECLARE_ALIGNED(16, int16_t, ltp_state)[3072]; ///< time signal for LTP DECLARE_ALIGNED(16, int16_t, ltp_state)[3072]; ///< time signal for LTP
PredictorState predictor_state[MAX_PREDICTORS]; PredictorState predictor_state[MAX_PREDICTORS];
} SingleChannelElement; } SingleChannelElement;
@ -272,7 +272,7 @@ typedef struct {
* @defgroup temporary aligned temporary buffers (We do not want to have these on the stack.) * @defgroup temporary aligned temporary buffers (We do not want to have these on the stack.)
* @{ * @{
*/ */
DECLARE_ALIGNED(16, float, buf_mdct)[1024]; DECLARE_ALIGNED(32, float, buf_mdct)[1024];
/** @} */ /** @} */
/** /**
@ -296,7 +296,7 @@ typedef struct {
int sf_offset; ///< offset into pow2sf_tab as appropriate for dsp.float_to_int16 int sf_offset; ///< offset into pow2sf_tab as appropriate for dsp.float_to_int16
/** @} */ /** @} */
DECLARE_ALIGNED(16, float, temp)[128]; DECLARE_ALIGNED(32, float, temp)[128];
enum OCStatus output_configured; enum OCStatus output_configured;
} AACContext; } AACContext;

View File

@ -37,6 +37,7 @@
#include "aac.h" #include "aac.h"
#include "aacenc.h" #include "aacenc.h"
#include "aactab.h" #include "aactab.h"
#include "libavutil/libm.h"
/** bits needed to code codebook run value for long windows */ /** bits needed to code codebook run value for long windows */
static const uint8_t run_value_bits_long[64] = { static const uint8_t run_value_bits_long[64] = {

View File

@ -64,7 +64,7 @@ typedef struct AACEncContext {
int last_frame; int last_frame;
float lambda; float lambda;
DECLARE_ALIGNED(16, int, qcoefs)[96]; ///< quantized coefficients DECLARE_ALIGNED(16, int, qcoefs)[96]; ///< quantized coefficients
DECLARE_ALIGNED(16, float, scoefs)[1024]; ///< scaled coefficients DECLARE_ALIGNED(32, float, scoefs)[1024]; ///< scaled coefficients
} AACEncContext; } AACEncContext;
#endif /* AVCODEC_AACENC_H */ #endif /* AVCODEC_AACENC_H */

View File

@ -32,6 +32,7 @@
#include "aacsbrdata.h" #include "aacsbrdata.h"
#include "fft.h" #include "fft.h"
#include "aacps.h" #include "aacps.h"
#include "libavutil/libm.h"
#include <stdint.h> #include <stdint.h>
#include <float.h> #include <float.h>

View File

@ -201,13 +201,13 @@ typedef struct {
///@} ///@}
///@defgroup arrays aligned arrays ///@defgroup arrays aligned arrays
DECLARE_ALIGNED(16, int, fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< fixed-point transform coefficients DECLARE_ALIGNED(16, int, fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///> fixed-point transform coefficients
DECLARE_ALIGNED(16, float, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< transform coefficients DECLARE_ALIGNED(32, float, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< transform coefficients
DECLARE_ALIGNED(16, float, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< delay - added to the next block DECLARE_ALIGNED(32, float, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< delay - added to the next block
DECLARE_ALIGNED(16, float, window)[AC3_BLOCK_SIZE]; ///< window coefficients DECLARE_ALIGNED(32, float, window)[AC3_BLOCK_SIZE]; ///< window coefficients
DECLARE_ALIGNED(16, float, tmp_output)[AC3_BLOCK_SIZE]; ///< temporary storage for output before windowing DECLARE_ALIGNED(32, float, tmp_output)[AC3_BLOCK_SIZE]; ///< temporary storage for output before windowing
DECLARE_ALIGNED(16, float, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< output after imdct transform and windowing DECLARE_ALIGNED(32, float, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< output after imdct transform and windowing
DECLARE_ALIGNED(16, uint8_t, input_buffer)[AC3_FRAME_BUFFER_SIZE + FF_INPUT_BUFFER_PADDING_SIZE]; ///< temp buffer to prevent overread DECLARE_ALIGNED(32, uint8_t, input_buffer)[AC3_FRAME_BUFFER_SIZE + FF_INPUT_BUFFER_PADDING_SIZE]; ///< temp buffer to prevent overread
///@} ///@}
} AC3DecodeContext; } AC3DecodeContext;

View File

@ -171,7 +171,7 @@ typedef struct AC3EncodeContext {
uint8_t exp_strategy[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< exponent strategies uint8_t exp_strategy[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< exponent strategies
DECLARE_ALIGNED(16, SampleType, windowed_samples)[AC3_WINDOW_SIZE]; DECLARE_ALIGNED(32, SampleType, windowed_samples)[AC3_WINDOW_SIZE];
} AC3EncodeContext; } AC3EncodeContext;
typedef struct AC3Mant { typedef struct AC3Mant {

View File

@ -47,7 +47,7 @@ static av_cold void mdct_end(AC3MDCTContext *mdct)
static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct, static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct,
int nbits) int nbits)
{ {
int ret = ff_mdct_init(&mdct->fft, nbits, 0, 1.0); int ret = ff_mdct_init(&mdct->fft, nbits, 0, -1.0);
mdct->window = ff_ac3_window; mdct->window = ff_ac3_window;
return ret; return ret;
} }

View File

@ -60,11 +60,11 @@ typedef struct {
int log2_block_count[AT1_QMF_BANDS]; ///< log2 number of blocks in a band int log2_block_count[AT1_QMF_BANDS]; ///< log2 number of blocks in a band
int num_bfus; ///< number of Block Floating Units int num_bfus; ///< number of Block Floating Units
float* spectrum[2]; float* spectrum[2];
DECLARE_ALIGNED(16, float, spec1)[AT1_SU_SAMPLES]; ///< mdct buffer DECLARE_ALIGNED(32, float, spec1)[AT1_SU_SAMPLES]; ///< mdct buffer
DECLARE_ALIGNED(16, float, spec2)[AT1_SU_SAMPLES]; ///< mdct buffer DECLARE_ALIGNED(32, float, spec2)[AT1_SU_SAMPLES]; ///< mdct buffer
DECLARE_ALIGNED(16, float, fst_qmf_delay)[46]; ///< delay line for the 1st stacked QMF filter DECLARE_ALIGNED(32, float, fst_qmf_delay)[46]; ///< delay line for the 1st stacked QMF filter
DECLARE_ALIGNED(16, float, snd_qmf_delay)[46]; ///< delay line for the 2nd stacked QMF filter DECLARE_ALIGNED(32, float, snd_qmf_delay)[46]; ///< delay line for the 2nd stacked QMF filter
DECLARE_ALIGNED(16, float, last_qmf_delay)[256+23]; ///< delay line for the last stacked QMF filter DECLARE_ALIGNED(32, float, last_qmf_delay)[256+23]; ///< delay line for the last stacked QMF filter
} AT1SUCtx; } AT1SUCtx;
/** /**
@ -72,13 +72,13 @@ typedef struct {
*/ */
typedef struct { typedef struct {
AT1SUCtx SUs[AT1_MAX_CHANNELS]; ///< channel sound unit AT1SUCtx SUs[AT1_MAX_CHANNELS]; ///< channel sound unit
DECLARE_ALIGNED(16, float, spec)[AT1_SU_SAMPLES]; ///< the mdct spectrum buffer DECLARE_ALIGNED(32, float, spec)[AT1_SU_SAMPLES]; ///< the mdct spectrum buffer
DECLARE_ALIGNED(16, float, low)[256]; DECLARE_ALIGNED(32, float, low)[256];
DECLARE_ALIGNED(16, float, mid)[256]; DECLARE_ALIGNED(32, float, mid)[256];
DECLARE_ALIGNED(16, float, high)[512]; DECLARE_ALIGNED(32, float, high)[512];
float* bands[3]; float* bands[3];
DECLARE_ALIGNED(16, float, out_samples)[AT1_MAX_CHANNELS][AT1_SU_SAMPLES]; DECLARE_ALIGNED(32, float, out_samples)[AT1_MAX_CHANNELS][AT1_SU_SAMPLES];
FFTContext mdct_ctx[3]; FFTContext mdct_ctx[3];
int channels; int channels;
DSPContext dsp; DSPContext dsp;

View File

@ -74,8 +74,8 @@ typedef struct {
int gcBlkSwitch; int gcBlkSwitch;
gain_block gainBlock[2]; gain_block gainBlock[2];
DECLARE_ALIGNED(16, float, spectrum)[1024]; DECLARE_ALIGNED(32, float, spectrum)[1024];
DECLARE_ALIGNED(16, float, IMDCT_buf)[1024]; DECLARE_ALIGNED(32, float, IMDCT_buf)[1024];
float delayBuf1[46]; ///<qmf delay buffers float delayBuf1[46]; ///<qmf delay buffers
float delayBuf2[46]; float delayBuf2[46];
@ -122,7 +122,7 @@ typedef struct {
FFTContext mdct_ctx; FFTContext mdct_ctx;
} ATRAC3Context; } ATRAC3Context;
static DECLARE_ALIGNED(16, float,mdct_window)[512]; static DECLARE_ALIGNED(32, float, mdct_window)[512];
static VLC spectral_coeff_tab[7]; static VLC spectral_coeff_tab[7];
static float gain_tab1[16]; static float gain_tab1[16];
static float gain_tab2[31]; static float gain_tab2[31];

View File

@ -766,7 +766,7 @@ typedef struct AVPanScan{
* - encoding: Set by libavcodec. for coded_picture (and set by user for input).\ * - encoding: Set by libavcodec. for coded_picture (and set by user for input).\
* - decoding: Set by libavcodec.\ * - decoding: Set by libavcodec.\
*/\ */\
int pict_type;\ enum AVPictureType pict_type;\
\ \
/**\ /**\
* presentation timestamp in time_base units (time when frame should be shown to user)\ * presentation timestamp in time_base units (time when frame should be shown to user)\
@ -1016,14 +1016,16 @@ typedef struct AVPanScan{
#define FF_BUFFER_TYPE_SHARED 4 ///< Buffer from somewhere else; don't deallocate image (data/base), all other tables are not shared. #define FF_BUFFER_TYPE_SHARED 4 ///< Buffer from somewhere else; don't deallocate image (data/base), all other tables are not shared.
#define FF_BUFFER_TYPE_COPY 8 ///< Just a (modified) copy of some other buffer, don't deallocate anything. #define FF_BUFFER_TYPE_COPY 8 ///< Just a (modified) copy of some other buffer, don't deallocate anything.
#if FF_API_OLD_FF_PICT_TYPES
#define FF_I_TYPE 1 ///< Intra /* DEPRECATED, directly use the AV_PICTURE_TYPE_* enum values */
#define FF_P_TYPE 2 ///< Predicted #define FF_I_TYPE AV_PICTURE_TYPE_I ///< Intra
#define FF_B_TYPE 3 ///< Bi-dir predicted #define FF_P_TYPE AV_PICTURE_TYPE_P ///< Predicted
#define FF_S_TYPE 4 ///< S(GMC)-VOP MPEG4 #define FF_B_TYPE AV_PICTURE_TYPE_B ///< Bi-dir predicted
#define FF_SI_TYPE 5 ///< Switching Intra #define FF_S_TYPE AV_PICTURE_TYPE_S ///< S(GMC)-VOP MPEG4
#define FF_SP_TYPE 6 ///< Switching Predicted #define FF_SI_TYPE AV_PICTURE_TYPE_SI ///< Switching Intra
#define FF_BI_TYPE 7 #define FF_SP_TYPE AV_PICTURE_TYPE_SP ///< Switching Predicted
#define FF_BI_TYPE AV_PICTURE_TYPE_BI
#endif
#define FF_BUFFER_HINTS_VALID 0x01 // Buffer hints value is meaningful (if 0 ignore). #define FF_BUFFER_HINTS_VALID 0x01 // Buffer hints value is meaningful (if 0 ignore).
#define FF_BUFFER_HINTS_READABLE 0x02 // Codec will read from buffer. #define FF_BUFFER_HINTS_READABLE 0x02 // Codec will read from buffer.
@ -1215,16 +1217,6 @@ typedef struct AVCodecContext {
*/ */
enum PixelFormat pix_fmt; enum PixelFormat pix_fmt;
#if FF_API_RATE_EMU
/**
* Frame rate emulation. If not zero, the lower layer (i.e. format handler)
* has to read frames at native frame rate.
* - encoding: Set by user.
* - decoding: unused
*/
attribute_deprecated int rate_emu;
#endif
/** /**
* If non NULL, 'draw_horiz_band' is called by the libavcodec * If non NULL, 'draw_horiz_band' is called by the libavcodec
* decoder to draw a horizontal band. It improves cache usage. Not * decoder to draw a horizontal band. It improves cache usage. Not
@ -1326,16 +1318,6 @@ typedef struct AVCodecContext {
int b_frame_strategy; int b_frame_strategy;
#if FF_API_HURRY_UP
/**
* hurry up amount
* - encoding: unused
* - decoding: Set by user. 1-> Skip B-frames, 2-> Skip IDCT/dequant too, 5-> Skip everything except header
* @deprecated Deprecated in favor of skip_idct and skip_frame.
*/
attribute_deprecated int hurry_up;
#endif
struct AVCodec *codec; struct AVCodec *codec;
void *priv_data; void *priv_data;
@ -1800,22 +1782,6 @@ typedef struct AVCodecContext {
*/ */
uint64_t error[4]; uint64_t error[4];
#if FF_API_MB_Q
/**
* minimum MB quantizer
* - encoding: unused
* - decoding: unused
*/
attribute_deprecated int mb_qmin;
/**
* maximum MB quantizer
* - encoding: unused
* - decoding: unused
*/
attribute_deprecated int mb_qmax;
#endif
/** /**
* motion estimation comparison function * motion estimation comparison function
* - encoding: Set by user. * - encoding: Set by user.
@ -3866,13 +3832,17 @@ void avcodec_default_free_buffers(AVCodecContext *s);
/* misc useful functions */ /* misc useful functions */
#if FF_API_OLD_FF_PICT_TYPES
/** /**
* Return a single letter to describe the given picture type pict_type. * Return a single letter to describe the given picture type pict_type.
* *
* @param[in] pict_type the picture type * @param[in] pict_type the picture type
* @return A single character representing the picture type. * @return A single character representing the picture type.
* @deprecated Use av_get_picture_type_char() instead.
*/ */
attribute_deprecated
char av_get_pict_type_char(int pict_type); char av_get_pict_type_char(int pict_type);
#endif
/** /**
* Return codec bits per sample. * Return codec bits per sample.

View File

@ -55,7 +55,7 @@ typedef struct {
int num_bands; int num_bands;
unsigned int *bands; unsigned int *bands;
float root; float root;
DECLARE_ALIGNED(16, FFTSample, coeffs)[BINK_BLOCK_MAX_SIZE]; DECLARE_ALIGNED(32, FFTSample, coeffs)[BINK_BLOCK_MAX_SIZE];
DECLARE_ALIGNED(16, short, previous)[BINK_BLOCK_MAX_SIZE / 16]; ///< coeffs from previous audio block DECLARE_ALIGNED(16, short, previous)[BINK_BLOCK_MAX_SIZE / 16]; ///< coeffs from previous audio block
float *coeffs_ptr[MAX_CHANNELS]; ///< pointers to the coeffs arrays for float_to_int16_interleave float *coeffs_ptr[MAX_CHANNELS]; ///< pointers to the coeffs arrays for float_to_int16_interleave
union { union {

View File

@ -153,7 +153,7 @@ typedef struct cook {
/* data buffers */ /* data buffers */
uint8_t* decoded_bytes_buffer; uint8_t* decoded_bytes_buffer;
DECLARE_ALIGNED(16, float,mono_mdct_output)[2048]; DECLARE_ALIGNED(32, float, mono_mdct_output)[2048];
float decode_buffer_1[1024]; float decode_buffer_1[1024];
float decode_buffer_2[1024]; float decode_buffer_2[1024];
float decode_buffer_0[1060]; /* static allocation for joint decode */ float decode_buffer_0[1060]; /* static allocation for joint decode */

View File

@ -321,16 +321,16 @@ typedef struct {
/* Subband samples history (for ADPCM) */ /* Subband samples history (for ADPCM) */
float subband_samples_hist[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4]; float subband_samples_hist[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4];
DECLARE_ALIGNED(16, float, subband_fir_hist)[DCA_PRIM_CHANNELS_MAX][512]; DECLARE_ALIGNED(32, float, subband_fir_hist)[DCA_PRIM_CHANNELS_MAX][512];
DECLARE_ALIGNED(16, float, subband_fir_noidea)[DCA_PRIM_CHANNELS_MAX][32]; DECLARE_ALIGNED(32, float, subband_fir_noidea)[DCA_PRIM_CHANNELS_MAX][32];
int hist_index[DCA_PRIM_CHANNELS_MAX]; int hist_index[DCA_PRIM_CHANNELS_MAX];
DECLARE_ALIGNED(16, float, raXin)[32]; DECLARE_ALIGNED(32, float, raXin)[32];
int output; ///< type of output int output; ///< type of output
float scale_bias; ///< output scale float scale_bias; ///< output scale
DECLARE_ALIGNED(16, float, subband_samples)[DCA_BLOCKS_MAX][DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][8]; DECLARE_ALIGNED(32, float, subband_samples)[DCA_BLOCKS_MAX][DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][8];
DECLARE_ALIGNED(16, float, samples)[(DCA_PRIM_CHANNELS_MAX+1)*256]; DECLARE_ALIGNED(32, float, samples)[(DCA_PRIM_CHANNELS_MAX+1)*256];
const float *samples_chanptr[DCA_PRIM_CHANNELS_MAX+1]; const float *samples_chanptr[DCA_PRIM_CHANNELS_MAX+1];
uint8_t dca_buffer[DCA_MAX_FRAME_SIZE + DCA_MAX_EXSS_HEADER_SIZE + DCA_BUFFER_PADDING_SIZE]; uint8_t dca_buffer[DCA_MAX_FRAME_SIZE + DCA_MAX_EXSS_HEADER_SIZE + DCA_BUFFER_PADDING_SIZE];

View File

@ -312,18 +312,16 @@ static void dct_error(const char *name, int is_idct,
} }
for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i])); for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
#if 1 // dump systematic errors
for(i=0; i<64; i++){ for(i=0; i<64; i++){
if(i%8==0) printf("\n"); if(i%8==0) printf("\n");
printf("%7d ", (int)sysErr[i]); printf("%7d ", (int)sysErr[i]);
} }
printf("\n"); printf("\n");
#endif
printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
is_idct ? "IDCT" : "DCT", is_idct ? "IDCT" : "DCT",
name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax); name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
#if 1 //Speed test
/* speed test */ /* speed test */
for(i=0;i<64;i++) for(i=0;i<64;i++)
block1[i] = 0; block1[i] = 0;
@ -376,7 +374,6 @@ static void dct_error(const char *name, int is_idct,
printf("%s %s: %0.1f kdct/s\n", printf("%s %s: %0.1f kdct/s\n",
is_idct ? "IDCT" : "DCT", is_idct ? "IDCT" : "DCT",
name, (double)it1 * 1000.0 / (double)ti1); name, (double)it1 * 1000.0 / (double)ti1);
#endif
} }
DECLARE_ALIGNED(8, static uint8_t, img_dest)[64]; DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];

View File

@ -786,7 +786,6 @@ void ff_er_frame_end(MpegEncContext *s){
} }
} }
#if 1
/* handle overlapping slices */ /* handle overlapping slices */
for(error_type=1; error_type<=3; error_type++){ for(error_type=1; error_type<=3; error_type++){
int end_ok=0; int end_ok=0;
@ -807,8 +806,7 @@ void ff_er_frame_end(MpegEncContext *s){
end_ok=0; end_ok=0;
} }
} }
#endif
#if 1
/* handle slices with partitions of different length */ /* handle slices with partitions of different length */
if(s->partitioned_frame){ if(s->partitioned_frame){
int end_ok=0; int end_ok=0;
@ -829,7 +827,7 @@ void ff_er_frame_end(MpegEncContext *s){
end_ok=0; end_ok=0;
} }
} }
#endif
/* handle missing slices */ /* handle missing slices */
if(s->error_recognition>=4){ if(s->error_recognition>=4){
int end_ok=1; int end_ok=1;
@ -853,7 +851,6 @@ void ff_er_frame_end(MpegEncContext *s){
} }
} }
#if 1
/* backward mark errors */ /* backward mark errors */
distance=9999999; distance=9999999;
for(error_type=1; error_type<=3; error_type++){ for(error_type=1; error_type<=3; error_type++){
@ -878,7 +875,6 @@ void ff_er_frame_end(MpegEncContext *s){
distance= 9999999; distance= 9999999;
} }
} }
#endif
/* forward mark errors */ /* forward mark errors */
error=0; error=0;
@ -893,7 +889,7 @@ void ff_er_frame_end(MpegEncContext *s){
s->error_status_table[mb_xy]|= error; s->error_status_table[mb_xy]|= error;
} }
} }
#if 1
/* handle not partitioned case */ /* handle not partitioned case */
if(!s->partitioned_frame){ if(!s->partitioned_frame){
for(i=0; i<s->mb_num; i++){ for(i=0; i<s->mb_num; i++){
@ -904,7 +900,6 @@ void ff_er_frame_end(MpegEncContext *s){
s->error_status_table[mb_xy]= error; s->error_status_table[mb_xy]= error;
} }
} }
#endif
dc_error= ac_error= mv_error=0; dc_error= ac_error= mv_error=0;
for(i=0; i<s->mb_num; i++){ for(i=0; i<s->mb_num; i++){
@ -1065,16 +1060,15 @@ void ff_er_frame_end(MpegEncContext *s){
s->dc_val[2][mb_x + mb_y*s->mb_stride]= (dcv+4)>>3; s->dc_val[2][mb_x + mb_y*s->mb_stride]= (dcv+4)>>3;
} }
} }
#if 1
/* guess DC for damaged blocks */ /* guess DC for damaged blocks */
guess_dc(s, s->dc_val[0], s->mb_width*2, s->mb_height*2, s->b8_stride, 1); guess_dc(s, s->dc_val[0], s->mb_width*2, s->mb_height*2, s->b8_stride, 1);
guess_dc(s, s->dc_val[1], s->mb_width , s->mb_height , s->mb_stride, 0); guess_dc(s, s->dc_val[1], s->mb_width , s->mb_height , s->mb_stride, 0);
guess_dc(s, s->dc_val[2], s->mb_width , s->mb_height , s->mb_stride, 0); guess_dc(s, s->dc_val[2], s->mb_width , s->mb_height , s->mb_stride, 0);
#endif
/* filter luma DC */ /* filter luma DC */
filter181(s->dc_val[0], s->mb_width*2, s->mb_height*2, s->b8_stride); filter181(s->dc_val[0], s->mb_width*2, s->mb_height*2, s->b8_stride);
#if 1
/* render DC only intra */ /* render DC only intra */
for(mb_y=0; mb_y<s->mb_height; mb_y++){ for(mb_y=0; mb_y<s->mb_height; mb_y++){
for(mb_x=0; mb_x<s->mb_width; mb_x++){ for(mb_x=0; mb_x<s->mb_width; mb_x++){
@ -1094,7 +1088,6 @@ void ff_er_frame_end(MpegEncContext *s){
put_dc(s, dest_y, dest_cb, dest_cr, mb_x, mb_y); put_dc(s, dest_y, dest_cb, dest_cr, mb_x, mb_y);
} }
} }
#endif
if(s->avctx->error_concealment&FF_EC_DEBLOCK){ if(s->avctx->error_concealment&FF_EC_DEBLOCK){
/* filter horizontal block boundaries */ /* filter horizontal block boundaries */

View File

@ -93,6 +93,44 @@ av_cold void ff_init_ff_cos_tabs(int index)
#endif #endif
} }
static const int avx_tab[] = {
0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15
};
static int is_second_half_of_fft32(int i, int n)
{
if (n <= 32)
return i >= 16;
else if (i < n/2)
return is_second_half_of_fft32(i, n/2);
else if (i < 3*n/4)
return is_second_half_of_fft32(i - n/2, n/4);
else
return is_second_half_of_fft32(i - 3*n/4, n/4);
}
static av_cold void fft_perm_avx(FFTContext *s)
{
int i;
int n = 1 << s->nbits;
for (i = 0; i < n; i += 16) {
int k;
if (is_second_half_of_fft32(i, n)) {
for (k = 0; k < 16; k++)
s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] =
i + avx_tab[k];
} else {
for (k = 0; k < 16; k++) {
int j = i + k;
j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4);
s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j;
}
}
}
}
av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
{ {
int i, j, n; int i, j, n;
@ -132,11 +170,16 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
for(j=4; j<=nbits; j++) { for(j=4; j<=nbits; j++) {
ff_init_ff_cos_tabs(j); ff_init_ff_cos_tabs(j);
} }
for(i=0; i<n; i++) {
int j = i; if (s->fft_permutation == FF_FFT_PERM_AVX) {
if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) fft_perm_avx(s);
j = (j&~3) | ((j>>1)&1) | ((j<<1)&2); } else {
s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j; for(i=0; i<n; i++) {
int j = i;
if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j;
}
} }
return 0; return 0;

View File

@ -85,6 +85,7 @@ struct FFTContext {
int fft_permutation; int fft_permutation;
#define FF_FFT_PERM_DEFAULT 0 #define FF_FFT_PERM_DEFAULT 0
#define FF_FFT_PERM_SWAP_LSBS 1 #define FF_FFT_PERM_SWAP_LSBS 1
#define FF_FFT_PERM_AVX 2
int mdct_permutation; int mdct_permutation;
#define FF_MDCT_PERM_NONE 0 #define FF_MDCT_PERM_NONE 0
#define FF_MDCT_PERM_INTERLEAVE 1 #define FF_MDCT_PERM_INTERLEAVE 1
@ -97,7 +98,7 @@ struct FFTContext {
#endif #endif
#define COSTABLE(size) \ #define COSTABLE(size) \
COSTABLE_CONST DECLARE_ALIGNED(16, FFTSample, FFT_NAME(ff_cos_##size))[size/2] COSTABLE_CONST DECLARE_ALIGNED(32, FFTSample, FFT_NAME(ff_cos_##size))[size/2]
extern COSTABLE(16); extern COSTABLE(16);
extern COSTABLE(32); extern COSTABLE(32);

View File

@ -599,10 +599,6 @@ retry:
s->current_picture.pict_type= s->pict_type; s->current_picture.pict_type= s->pict_type;
s->current_picture.key_frame= s->pict_type == FF_I_TYPE; s->current_picture.key_frame= s->pict_type == FF_I_TYPE;
#if FF_API_HURRY_UP
/* skip everything if we are in a hurry>=5 */
if(avctx->hurry_up>=5) return get_consumed_bytes(s, buf_size);
#endif
if( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==FF_B_TYPE) if( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==FF_B_TYPE)
||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=FF_I_TYPE) ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=FF_I_TYPE)
|| avctx->skip_frame >= AVDISCARD_ALL) || avctx->skip_frame >= AVDISCARD_ALL)

View File

@ -612,18 +612,10 @@ retry:
/* skip B-frames if we don't have reference frames */ /* skip B-frames if we don't have reference frames */
if(s->last_picture_ptr==NULL && (s->pict_type==FF_B_TYPE || s->dropable)) return get_consumed_bytes(s, buf_size); if(s->last_picture_ptr==NULL && (s->pict_type==FF_B_TYPE || s->dropable)) return get_consumed_bytes(s, buf_size);
#if FF_API_HURRY_UP
/* skip b frames if we are in a hurry */
if(avctx->hurry_up && s->pict_type==FF_B_TYPE) return get_consumed_bytes(s, buf_size);
#endif
if( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==FF_B_TYPE) if( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==FF_B_TYPE)
|| (avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=FF_I_TYPE) || (avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=FF_I_TYPE)
|| avctx->skip_frame >= AVDISCARD_ALL) || avctx->skip_frame >= AVDISCARD_ALL)
return get_consumed_bytes(s, buf_size); return get_consumed_bytes(s, buf_size);
#if FF_API_HURRY_UP
/* skip everything if we are in a hurry>=5 */
if(avctx->hurry_up>=5) return get_consumed_bytes(s, buf_size);
#endif
if(s->next_p_frame_damaged){ if(s->next_p_frame_damaged){
if(s->pict_type==FF_B_TYPE) if(s->pict_type==FF_B_TYPE)

View File

@ -2966,11 +2966,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
buf_index += consumed; buf_index += consumed;
//FIXME do not discard SEI id //FIXME do not discard SEI id
if( if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)
#if FF_API_HURRY_UP
(s->hurry_up == 1 && h->nal_ref_idc == 0) ||
#endif
(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
continue; continue;
again: again:
@ -3007,9 +3003,6 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
} }
if(hx->redundant_pic_count==0 if(hx->redundant_pic_count==0
#if FF_API_HURRY_UP
&& hx->s.hurry_up < 5
#endif
&& (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
&& (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE) && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
&& (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE) && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
@ -3047,9 +3040,6 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
&& s->context_initialized && s->context_initialized
#if FF_API_HURRY_UP
&& s->hurry_up < 5
#endif
&& (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
&& (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE) && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
&& (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE) && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
@ -3186,11 +3176,7 @@ static int decode_frame(AVCodecContext *avctx,
} }
if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){ if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
if (avctx->skip_frame >= AVDISCARD_NONREF if (avctx->skip_frame >= AVDISCARD_NONREF)
#if FF_API_HURRY_UP
|| s->hurry_up
#endif
)
return 0; return 0;
av_log(avctx, AV_LOG_ERROR, "no frame!\n"); av_log(avctx, AV_LOG_ERROR, "no frame!\n");
return -1; return -1;

View File

@ -1007,7 +1007,6 @@ static void fill_decode_caches(H264Context *h, int mb_type){
} }
} }
#if 1
if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){ if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){
int list; int list;
for(list=0; list<h->list_count; list++){ for(list=0; list<h->list_count; list++){
@ -1182,7 +1181,6 @@ static void fill_decode_caches(H264Context *h, int mb_type){
} }
} }
} }
#endif
h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]); h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
} }

View File

@ -88,7 +88,7 @@ typedef struct {
DSPContext dsp; DSPContext dsp;
FFTContext fft; FFTContext fft;
DECLARE_ALIGNED(16, FFTComplex, samples)[COEFFS/2]; DECLARE_ALIGNED(32, FFTComplex, samples)[COEFFS/2];
float *out_samples; float *out_samples;
} IMCContext; } IMCContext;

View File

@ -158,7 +158,6 @@ static int hpel_motion_search(MpegEncContext * s,
const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
+ (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor; + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
#if 1
int key; int key;
int map_generation= c->map_generation; int map_generation= c->map_generation;
#ifndef NDEBUG #ifndef NDEBUG
@ -172,7 +171,6 @@ static int hpel_motion_search(MpegEncContext * s,
assert(map[(index+1)&(ME_MAP_SIZE-1)] == key); assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation; key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
assert(map[(index-1)&(ME_MAP_SIZE-1)] == key); assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
#endif
if(t<=b){ if(t<=b){
CHECK_HALF_MV(0, 1, mx ,my-1) CHECK_HALF_MV(0, 1, mx ,my-1)
if(l<=r){ if(l<=r){

View File

@ -2476,18 +2476,10 @@ static int decode_chunks(AVCodecContext *avctx,
/* Skip P-frames if we do not have a reference frame or we have an invalid header. */ /* Skip P-frames if we do not have a reference frame or we have an invalid header. */
if(s2->pict_type==FF_P_TYPE && !s->sync) break; if(s2->pict_type==FF_P_TYPE && !s->sync) break;
} }
#if FF_API_HURRY_UP
/* Skip B-frames if we are in a hurry. */
if(avctx->hurry_up && s2->pict_type==FF_B_TYPE) break;
#endif
if( (avctx->skip_frame >= AVDISCARD_NONREF && s2->pict_type==FF_B_TYPE) if( (avctx->skip_frame >= AVDISCARD_NONREF && s2->pict_type==FF_B_TYPE)
||(avctx->skip_frame >= AVDISCARD_NONKEY && s2->pict_type!=FF_I_TYPE) ||(avctx->skip_frame >= AVDISCARD_NONKEY && s2->pict_type!=FF_I_TYPE)
|| avctx->skip_frame >= AVDISCARD_ALL) || avctx->skip_frame >= AVDISCARD_ALL)
break; break;
#if FF_API_HURRY_UP
/* Skip everything if we are in a hurry>=5. */
if(avctx->hurry_up>=5) break;
#endif
if (!s->mpeg_enc_ctx_allocated) break; if (!s->mpeg_enc_ctx_allocated) break;

View File

@ -1131,9 +1131,6 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
} }
} }
#if FF_API_HURRY_UP
s->hurry_up= s->avctx->hurry_up;
#endif
s->error_recognition= avctx->error_recognition; s->error_recognition= avctx->error_recognition;
/* set dequantizer, we can't do it during init as it might change for mpeg4 /* set dequantizer, we can't do it during init as it might change for mpeg4
@ -2125,9 +2122,6 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
} }
/* skip dequant / idct if we are really late ;) */ /* skip dequant / idct if we are really late ;) */
#if FF_API_HURRY_UP
if(s->hurry_up>1) goto skip_idct;
#endif
if(s->avctx->skip_idct){ if(s->avctx->skip_idct){
if( (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == FF_B_TYPE) if( (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == FF_B_TYPE)
||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != FF_I_TYPE) ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != FF_I_TYPE)

View File

@ -391,11 +391,6 @@ typedef struct MpegEncContext {
int no_rounding; /**< apply no rounding to motion compensation (MPEG4, msmpeg4, ...) int no_rounding; /**< apply no rounding to motion compensation (MPEG4, msmpeg4, ...)
for b-frames rounding mode is always 0 */ for b-frames rounding mode is always 0 */
#if FF_API_HURRY_UP
int hurry_up; /**< when set to 1 during decoding, b frames will be skipped
when set to 2 idct/dequant will be skipped too */
#endif
/* macroblock layer */ /* macroblock layer */
int mb_x, mb_y; int mb_x, mb_y;
int mb_skip_run; int mb_skip_run;

View File

@ -985,10 +985,9 @@ void ff_msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n)
if(level<=MAX_LEVEL && run<=MAX_RUN){ if(level<=MAX_LEVEL && run<=MAX_RUN){
s->ac_stats[s->mb_intra][n>3][level][run][last]++; s->ac_stats[s->mb_intra][n>3][level][run][last]++;
} }
#if 0
else s->ac_stats[s->mb_intra][n > 3][40][63][0]++; //esc3 like
s->ac_stats[s->mb_intra][n>3][40][63][0]++; //esc3 like
#endif
code = get_rl_index(rl, last, run, level); code = get_rl_index(rl, last, run, level);
put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
if (code == rl->n) { if (code == rl->n) {

View File

@ -47,7 +47,7 @@
typedef struct NellyMoserDecodeContext { typedef struct NellyMoserDecodeContext {
AVCodecContext* avctx; AVCodecContext* avctx;
DECLARE_ALIGNED(16, float,float_buf)[NELLY_SAMPLES]; DECLARE_ALIGNED(32, float, float_buf)[NELLY_SAMPLES];
float state[128]; float state[128];
AVLFG random_state; AVLFG random_state;
GetBitContext gb; GetBitContext gb;
@ -55,7 +55,7 @@ typedef struct NellyMoserDecodeContext {
DSPContext dsp; DSPContext dsp;
FFTContext imdct_ctx; FFTContext imdct_ctx;
FmtConvertContext fmt_conv; FmtConvertContext fmt_conv;
DECLARE_ALIGNED(16, float,imdct_out)[NELLY_BUF_LEN * 2]; DECLARE_ALIGNED(32, float, imdct_out)[NELLY_BUF_LEN * 2];
} NellyMoserDecodeContext; } NellyMoserDecodeContext;
static void overlap_and_window(NellyMoserDecodeContext *s, float *state, float *audio, float *a_in) static void overlap_and_window(NellyMoserDecodeContext *s, float *state, float *audio, float *a_in)

View File

@ -55,9 +55,9 @@ typedef struct NellyMoserEncodeContext {
int have_saved; int have_saved;
DSPContext dsp; DSPContext dsp;
FFTContext mdct_ctx; FFTContext mdct_ctx;
DECLARE_ALIGNED(16, float, mdct_out)[NELLY_SAMPLES]; DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
DECLARE_ALIGNED(16, float, in_buff)[NELLY_SAMPLES]; DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
DECLARE_ALIGNED(16, float, buf)[2][3 * NELLY_BUF_LEN]; ///< sample buffer DECLARE_ALIGNED(32, float, buf)[2][3 * NELLY_BUF_LEN]; ///< sample buffer
float (*opt )[NELLY_BANDS]; float (*opt )[NELLY_BANDS];
uint8_t (*path)[NELLY_BANDS]; uint8_t (*path)[NELLY_BANDS];
} NellyMoserEncodeContext; } NellyMoserEncodeContext;

View File

@ -105,9 +105,6 @@ static const AVOption options[]={
{"extradata_size", NULL, OFFSET(extradata_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, {"extradata_size", NULL, OFFSET(extradata_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
{"time_base", NULL, OFFSET(time_base), FF_OPT_TYPE_RATIONAL, DEFAULT, INT_MIN, INT_MAX}, {"time_base", NULL, OFFSET(time_base), FF_OPT_TYPE_RATIONAL, DEFAULT, INT_MIN, INT_MAX},
{"g", "set the group of picture size", OFFSET(gop_size), FF_OPT_TYPE_INT, 12, INT_MIN, INT_MAX, V|E}, {"g", "set the group of picture size", OFFSET(gop_size), FF_OPT_TYPE_INT, 12, INT_MIN, INT_MAX, V|E},
#if FF_API_RATE_EMU
{"rate_emu", "frame rate emulation", OFFSET(rate_emu), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
#endif
{"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, {"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
{"ac", "set number of audio channels", OFFSET(channels), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, {"ac", "set number of audio channels", OFFSET(channels), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
{"cutoff", "set cutoff bandwidth", OFFSET(cutoff), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|E}, {"cutoff", "set cutoff bandwidth", OFFSET(cutoff), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|E},
@ -124,9 +121,6 @@ static const AVOption options[]={
{"rc_strategy", "ratecontrol method", OFFSET(rc_strategy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"rc_strategy", "ratecontrol method", OFFSET(rc_strategy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
{"b_strategy", "strategy to choose between I/P/B-frames", OFFSET(b_frame_strategy), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E}, {"b_strategy", "strategy to choose between I/P/B-frames", OFFSET(b_frame_strategy), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
{"wpredp", "weighted prediction analysis method", OFFSET(weighted_p_pred), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E}, {"wpredp", "weighted prediction analysis method", OFFSET(weighted_p_pred), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
#if FF_API_HURRY_UP
{"hurry_up", "deprecated, use skip_idct/skip_frame instead", OFFSET(hurry_up), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
#endif
{"ps", "rtp payload size in bytes", OFFSET(rtp_payload_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"ps", "rtp payload size in bytes", OFFSET(rtp_payload_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
{"mv_bits", NULL, OFFSET(mv_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, {"mv_bits", NULL, OFFSET(mv_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
{"header_bits", NULL, OFFSET(header_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, {"header_bits", NULL, OFFSET(header_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
@ -253,10 +247,6 @@ static const AVOption options[]={
{"pf", "forward predicted MVs of P-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_P_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"}, {"pf", "forward predicted MVs of P-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_P_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"},
{"bf", "forward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"}, {"bf", "forward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"},
{"bb", "backward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_BACK, INT_MIN, INT_MAX, V|D, "debug_mv"}, {"bb", "backward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_BACK, INT_MIN, INT_MAX, V|D, "debug_mv"},
#if FF_API_MB_Q
{"mb_qmin", "obsolete, use qmin", OFFSET(mb_qmin), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
{"mb_qmax", "obsolete, use qmax", OFFSET(mb_qmax), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
#endif
{"cmp", "full pel me compare function", OFFSET(me_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"}, {"cmp", "full pel me compare function", OFFSET(me_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
{"subcmp", "sub pel me compare function", OFFSET(me_sub_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"}, {"subcmp", "sub pel me compare function", OFFSET(me_sub_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
{"mbcmp", "macroblock compare function", OFFSET(mb_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"}, {"mbcmp", "macroblock compare function", OFFSET(mb_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},

View File

@ -380,9 +380,6 @@ static void update_context_from_user(AVCodecContext *dst, AVCodecContext *src)
dst->release_buffer = src->release_buffer; dst->release_buffer = src->release_buffer;
dst->opaque = src->opaque; dst->opaque = src->opaque;
#if FF_API_HURRY_UP
dst->hurry_up = src->hurry_up;
#endif
dst->dsp_mask = src->dsp_mask; dst->dsp_mask = src->dsp_mask;
dst->debug = src->debug; dst->debug = src->debug;
dst->debug_mv = src->debug_mv; dst->debug_mv = src->debug_mv;

View File

@ -120,7 +120,7 @@ typedef struct {
} FFTCoefficient; } FFTCoefficient;
typedef struct { typedef struct {
DECLARE_ALIGNED(16, QDM2Complex, complex)[MPA_MAX_CHANNELS][256]; DECLARE_ALIGNED(32, QDM2Complex, complex)[MPA_MAX_CHANNELS][256];
} QDM2FFT; } QDM2FFT;
/** /**

View File

@ -1454,19 +1454,10 @@ int ff_rv34_decode_frame(AVCodecContext *avctx,
} }
if((!s->last_picture_ptr || !s->last_picture_ptr->data[0]) && si.type == FF_B_TYPE) if((!s->last_picture_ptr || !s->last_picture_ptr->data[0]) && si.type == FF_B_TYPE)
return -1; return -1;
#if FF_API_HURRY_UP
/* skip b frames if we are in a hurry */
if(avctx->hurry_up && si.type==FF_B_TYPE) return buf_size;
#endif
if( (avctx->skip_frame >= AVDISCARD_NONREF && si.type==FF_B_TYPE) if( (avctx->skip_frame >= AVDISCARD_NONREF && si.type==FF_B_TYPE)
|| (avctx->skip_frame >= AVDISCARD_NONKEY && si.type!=FF_I_TYPE) || (avctx->skip_frame >= AVDISCARD_NONKEY && si.type!=FF_I_TYPE)
|| avctx->skip_frame >= AVDISCARD_ALL) || avctx->skip_frame >= AVDISCARD_ALL)
return buf_size; return buf_size;
#if FF_API_HURRY_UP
/* skip everything if we are in a hurry>=5 */
if(avctx->hurry_up>=5)
return buf_size;
#endif
for(i=0; i<slice_count; i++){ for(i=0; i<slice_count; i++){
int offset= get_slice_offset(avctx, slices_hdr, i); int offset= get_slice_offset(avctx, slices_hdr, i);

View File

@ -897,7 +897,6 @@ QPEL_MC(0, avg_ , _ , op_avg)
#undef op_put #undef op_put
#undef op_put_no_rnd #undef op_put_no_rnd
#if 1
#define H264_LOWPASS(OPNAME, OP, OP2) \ #define H264_LOWPASS(OPNAME, OP, OP2) \
static inline void OPNAME ## h264_qpel_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\ static inline void OPNAME ## h264_qpel_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
@ -1298,7 +1297,6 @@ H264_MC(avg_, 16)
#undef op_put #undef op_put
#undef op2_avg #undef op2_avg
#undef op2_put #undef op2_put
#endif
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;

View File

@ -3293,10 +3293,8 @@ static void iterative_me(SnowContext *s){
} }
best_rd= ref_rd; best_rd= ref_rd;
*block= ref_b; *block= ref_b;
#if 1
check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd); check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
//FIXME RD style color selection //FIXME RD style color selection
#endif
if(!same_block(block, &backup)){ if(!same_block(block, &backup)){
if(tb ) tb ->type &= ~BLOCK_OPT; if(tb ) tb ->type &= ~BLOCK_OPT;
if(lb ) lb ->type &= ~BLOCK_OPT; if(lb ) lb ->type &= ~BLOCK_OPT;

View File

@ -684,9 +684,6 @@ static int svq1_decode_frame(AVCodecContext *avctx,
//this should be removed after libavcodec can handle more flexible picture types & ordering //this should be removed after libavcodec can handle more flexible picture types & ordering
if(s->pict_type==FF_B_TYPE && s->last_picture_ptr==NULL) return buf_size; if(s->pict_type==FF_B_TYPE && s->last_picture_ptr==NULL) return buf_size;
#if FF_API_HURRY_UP
if(avctx->hurry_up && s->pict_type==FF_B_TYPE) return buf_size;
#endif
if( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==FF_B_TYPE) if( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==FF_B_TYPE)
||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=FF_I_TYPE) ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=FF_I_TYPE)
|| avctx->skip_frame >= AVDISCARD_ALL) || avctx->skip_frame >= AVDISCARD_ALL)

View File

@ -952,14 +952,6 @@ static int svq3_decode_frame(AVCodecContext *avctx,
/* Skip B-frames if we do not have reference frames. */ /* Skip B-frames if we do not have reference frames. */
if (s->last_picture_ptr == NULL && s->pict_type == FF_B_TYPE) if (s->last_picture_ptr == NULL && s->pict_type == FF_B_TYPE)
return 0; return 0;
#if FF_API_HURRY_UP
/* Skip B-frames if we are in a hurry. */
if (avctx->hurry_up && s->pict_type == FF_B_TYPE)
return 0;
/* Skip everything if we are in a hurry >= 5. */
if (avctx->hurry_up >= 5)
return 0;
#endif
if ( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == FF_B_TYPE) if ( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == FF_B_TYPE)
||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type != FF_I_TYPE) ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type != FF_I_TYPE)
|| avctx->skip_frame >= AVDISCARD_ALL) || avctx->skip_frame >= AVDISCARD_ALL)

View File

@ -1105,18 +1105,11 @@ void avcodec_default_free_buffers(AVCodecContext *s){
s->internal_buffer_count=0; s->internal_buffer_count=0;
} }
#if FF_API_OLD_FF_PICT_TYPES
char av_get_pict_type_char(int pict_type){ char av_get_pict_type_char(int pict_type){
switch(pict_type){ return av_get_picture_type_char(pict_type);
case FF_I_TYPE: return 'I';
case FF_P_TYPE: return 'P';
case FF_B_TYPE: return 'B';
case FF_S_TYPE: return 'S';
case FF_SI_TYPE:return 'i';
case FF_SP_TYPE:return 'p';
case FF_BI_TYPE:return 'b';
default: return '?';
}
} }
#endif
int av_get_bits_per_sample(enum CodecID codec_id){ int av_get_bits_per_sample(enum CodecID codec_id){
switch(codec_id){ switch(codec_id){

View File

@ -3519,21 +3519,11 @@ static int vc1_decode_frame(AVCodecContext *avctx,
if(s->last_picture_ptr==NULL && (s->pict_type==FF_B_TYPE || s->dropable)){ if(s->last_picture_ptr==NULL && (s->pict_type==FF_B_TYPE || s->dropable)){
goto err; goto err;
} }
#if FF_API_HURRY_UP
/* skip b frames if we are in a hurry */
if(avctx->hurry_up && s->pict_type==FF_B_TYPE) return -1;//buf_size;
#endif
if( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==FF_B_TYPE) if( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==FF_B_TYPE)
|| (avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=FF_I_TYPE) || (avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=FF_I_TYPE)
|| avctx->skip_frame >= AVDISCARD_ALL) { || avctx->skip_frame >= AVDISCARD_ALL) {
goto end; goto end;
} }
#if FF_API_HURRY_UP
/* skip everything if we are in a hurry>=5 */
if(avctx->hurry_up>=5) {
goto err;
}
#endif
if(s->next_p_frame_damaged){ if(s->next_p_frame_damaged){
if(s->pict_type==FF_B_TYPE) if(s->pict_type==FF_B_TYPE)

View File

@ -22,7 +22,7 @@
#define LIBAVCODEC_VERSION_MAJOR 53 #define LIBAVCODEC_VERSION_MAJOR 53
#define LIBAVCODEC_VERSION_MINOR 1 #define LIBAVCODEC_VERSION_MINOR 1
#define LIBAVCODEC_VERSION_MICRO 0 #define LIBAVCODEC_VERSION_MICRO 1
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
LIBAVCODEC_VERSION_MINOR, \ LIBAVCODEC_VERSION_MINOR, \
@ -47,15 +47,6 @@
#ifndef FF_API_OLD_AUDIOCONVERT #ifndef FF_API_OLD_AUDIOCONVERT
#define FF_API_OLD_AUDIOCONVERT (LIBAVCODEC_VERSION_MAJOR < 54) #define FF_API_OLD_AUDIOCONVERT (LIBAVCODEC_VERSION_MAJOR < 54)
#endif #endif
#ifndef FF_API_HURRY_UP
#define FF_API_HURRY_UP (LIBAVCODEC_VERSION_MAJOR < 53)
#endif
#ifndef FF_API_RATE_EMU
#define FF_API_RATE_EMU (LIBAVCODEC_VERSION_MAJOR < 53)
#endif
#ifndef FF_API_MB_Q
#define FF_API_MB_Q (LIBAVCODEC_VERSION_MAJOR < 53)
#endif
#ifndef FF_API_ANTIALIAS_ALGO #ifndef FF_API_ANTIALIAS_ALGO
#define FF_API_ANTIALIAS_ALGO (LIBAVCODEC_VERSION_MAJOR < 54) #define FF_API_ANTIALIAS_ALGO (LIBAVCODEC_VERSION_MAJOR < 54)
#endif #endif
@ -68,5 +59,8 @@
#ifndef FF_API_THREAD_INIT #ifndef FF_API_THREAD_INIT
#define FF_API_THREAD_INIT (LIBAVCODEC_VERSION_MAJOR < 54) #define FF_API_THREAD_INIT (LIBAVCODEC_VERSION_MAJOR < 54)
#endif #endif
#ifndef FF_API_OLD_FF_PICT_TYPES
#define FF_API_OLD_FF_PICT_TYPES (LIBAVCODEC_VERSION_MAJOR < 54)
#endif
#endif /* AVCODEC_VERSION_H */ #endif /* AVCODEC_VERSION_H */

View File

@ -113,15 +113,15 @@ typedef struct WMACodecContext {
uint8_t ms_stereo; ///< true if mid/side stereo mode uint8_t ms_stereo; ///< true if mid/side stereo mode
uint8_t channel_coded[MAX_CHANNELS]; ///< true if channel is coded uint8_t channel_coded[MAX_CHANNELS]; ///< true if channel is coded
int exponents_bsize[MAX_CHANNELS]; ///< log2 ratio frame/exp. length int exponents_bsize[MAX_CHANNELS]; ///< log2 ratio frame/exp. length
DECLARE_ALIGNED(16, float, exponents)[MAX_CHANNELS][BLOCK_MAX_SIZE]; DECLARE_ALIGNED(32, float, exponents)[MAX_CHANNELS][BLOCK_MAX_SIZE];
float max_exponent[MAX_CHANNELS]; float max_exponent[MAX_CHANNELS];
WMACoef coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; WMACoef coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
DECLARE_ALIGNED(16, float, coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE]; DECLARE_ALIGNED(32, float, coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE];
DECLARE_ALIGNED(16, FFTSample, output)[BLOCK_MAX_SIZE * 2]; DECLARE_ALIGNED(32, FFTSample, output)[BLOCK_MAX_SIZE * 2];
FFTContext mdct_ctx[BLOCK_NB_SIZES]; FFTContext mdct_ctx[BLOCK_NB_SIZES];
float *windows[BLOCK_NB_SIZES]; float *windows[BLOCK_NB_SIZES];
/* output buffer for one frame and the last for IMDCT windowing */ /* output buffer for one frame and the last for IMDCT windowing */
DECLARE_ALIGNED(16, float, frame_out)[MAX_CHANNELS][BLOCK_MAX_SIZE * 2]; DECLARE_ALIGNED(32, float, frame_out)[MAX_CHANNELS][BLOCK_MAX_SIZE * 2];
/* last frame info */ /* last frame info */
uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */ uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */
int last_bitoffset; int last_bitoffset;

View File

@ -145,7 +145,7 @@ typedef struct {
uint8_t table_idx; ///< index in sf_offsets for the scale factor reference block uint8_t table_idx; ///< index in sf_offsets for the scale factor reference block
float* coeffs; ///< pointer to the subframe decode buffer float* coeffs; ///< pointer to the subframe decode buffer
uint16_t num_vec_coeffs; ///< number of vector coded coefficients uint16_t num_vec_coeffs; ///< number of vector coded coefficients
DECLARE_ALIGNED(16, float, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer DECLARE_ALIGNED(32, float, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
} WMAProChannelCtx; } WMAProChannelCtx;
/** /**
@ -170,7 +170,7 @@ typedef struct WMAProDecodeCtx {
FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
PutBitContext pb; ///< context for filling the frame_data buffer PutBitContext pb; ///< context for filling the frame_data buffer
FFTContext mdct_ctx[WMAPRO_BLOCK_SIZES]; ///< MDCT context per block size FFTContext mdct_ctx[WMAPRO_BLOCK_SIZES]; ///< MDCT context per block size
DECLARE_ALIGNED(16, float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer DECLARE_ALIGNED(32, float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
float* windows[WMAPRO_BLOCK_SIZES]; ///< windows for the different block sizes float* windows[WMAPRO_BLOCK_SIZES]; ///< windows for the different block sizes
/* frame size dependent frame information (set during initialization) */ /* frame size dependent frame information (set during initialization) */

View File

@ -275,11 +275,11 @@ typedef struct {
///< by postfilter ///< by postfilter
float denoise_filter_cache[MAX_FRAMESIZE]; float denoise_filter_cache[MAX_FRAMESIZE];
int denoise_filter_cache_size; ///< samples in #denoise_filter_cache int denoise_filter_cache_size; ///< samples in #denoise_filter_cache
DECLARE_ALIGNED(16, float, tilted_lpcs_pf)[0x80]; DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
///< aligned buffer for LPC tilting ///< aligned buffer for LPC tilting
DECLARE_ALIGNED(16, float, denoise_coeffs_pf)[0x80]; DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
///< aligned buffer for denoise coefficients ///< aligned buffer for denoise coefficients
DECLARE_ALIGNED(16, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16]; DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
///< aligned buffer for postfilter speech ///< aligned buffer for postfilter speech
///< synthesis ///< synthesis
/** /**

View File

@ -25,7 +25,14 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
{ {
#if HAVE_YASM #if HAVE_YASM
int has_vectors = av_get_cpu_flags(); int has_vectors = av_get_cpu_flags();
if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) { if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX && s->nbits >= 5) {
/* AVX for SB */
s->imdct_calc = ff_imdct_calc_sse;
s->imdct_half = ff_imdct_half_avx;
s->fft_permute = ff_fft_permute_sse;
s->fft_calc = ff_fft_calc_avx;
s->fft_permutation = FF_FFT_PERM_AVX;
} else if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) {
/* SSE for P3/P4/K8 */ /* SSE for P3/P4/K8 */
s->imdct_calc = ff_imdct_calc_sse; s->imdct_calc = ff_imdct_calc_sse;
s->imdct_half = ff_imdct_half_sse; s->imdct_half = ff_imdct_half_sse;

View File

@ -22,6 +22,7 @@
#include "libavcodec/fft.h" #include "libavcodec/fft.h"
void ff_fft_permute_sse(FFTContext *s, FFTComplex *z); void ff_fft_permute_sse(FFTContext *s, FFTComplex *z);
void ff_fft_calc_avx(FFTContext *s, FFTComplex *z);
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z); void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z);
void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z); void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z);
@ -32,6 +33,7 @@ void ff_imdct_calc_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_dct32_float_sse(FFTSample *out, const FFTSample *in); void ff_dct32_float_sse(FFTSample *out, const FFTSample *in);
#endif #endif

View File

@ -1,6 +1,7 @@
;****************************************************************************** ;******************************************************************************
;* FFT transform with SSE/3DNow optimizations ;* FFT transform with SSE/3DNow optimizations
;* Copyright (c) 2008 Loren Merritt ;* Copyright (c) 2008 Loren Merritt
;* Copyright (c) 2011 Vitor Sessak
;* ;*
;* This algorithm (though not any of the implementation details) is ;* This algorithm (though not any of the implementation details) is
;* based on libdjbfft by D. J. Bernstein. ;* based on libdjbfft by D. J. Bernstein.
@ -49,9 +50,21 @@ endstruc
SECTION_RODATA SECTION_RODATA
%define M_SQRT1_2 0.70710678118654752440 %define M_SQRT1_2 0.70710678118654752440
ps_root2: times 4 dd M_SQRT1_2 %define M_COS_PI_1_8 0.923879532511287
ps_root2mppm: dd -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2 %define M_COS_PI_3_8 0.38268343236509
ps_p1p1m1p1: dd 0, 0, 1<<31, 0
align 32
ps_cos16_1: dd 1.0, M_COS_PI_1_8, M_SQRT1_2, M_COS_PI_3_8, 1.0, M_COS_PI_1_8, M_SQRT1_2, M_COS_PI_3_8
ps_cos16_2: dd 0, M_COS_PI_3_8, M_SQRT1_2, M_COS_PI_1_8, 0, -M_COS_PI_3_8, -M_SQRT1_2, -M_COS_PI_1_8
ps_root2: times 8 dd M_SQRT1_2
ps_root2mppm: dd -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
ps_p1p1m1p1: dd 0, 0, 1<<31, 0, 0, 0, 1<<31, 0
perm1: dd 0x00, 0x02, 0x03, 0x01, 0x03, 0x00, 0x02, 0x01
perm2: dd 0x00, 0x01, 0x02, 0x03, 0x01, 0x00, 0x02, 0x03
ps_p1p1m1p1root2: dd 1.0, 1.0, -1.0, 1.0, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2
ps_m1m1p1m1p1m1m1m1: dd 1<<31, 1<<31, 0, 1<<31, 0, 1<<31, 1<<31, 1<<31
ps_m1p1: dd 1<<31, 0 ps_m1p1: dd 1<<31, 0
%assign i 16 %assign i 16
@ -96,51 +109,80 @@ section .text align=16
SWAP %3, %6 SWAP %3, %6
%endmacro %endmacro
; in: %1={r0,i0,r2,i2} %2={r1,i1,r3,i3} ; in: %1 = {r0,i0,r2,i2,r4,i4,r6,i6}
; out: %1={r0,r1,r2,r3} %2={i0,i1,i2,i3} ; %2 = {r1,i1,r3,i3,r5,i5,r7,i7}
%macro T4_SSE 3 ; %3, %4, %5 tmp
mova %3, %1 ; out: %1 = {r0,r1,r2,r3,i0,i1,i2,i3}
addps %1, %2 ; {t1,t2,t6,t5} ; %2 = {r4,r5,r6,r7,i4,i5,i6,i7}
subps %3, %2 ; {t3,t4,-t8,t7} %macro T8_AVX 5
xorps %3, [ps_p1p1m1p1] vsubps %5, %1, %2 ; v = %1 - %2
mova %2, %1 vaddps %3, %1, %2 ; w = %1 + %2
shufps %1, %3, 0x44 ; {t1,t2,t3,t4} vmulps %2, %5, [ps_p1p1m1p1root2] ; v *= vals1
shufps %2, %3, 0xbe ; {t6,t5,t7,t8} vpermilps %2, %2, [perm1]
mova %3, %1 vblendps %1, %2, %3, 0x33 ; q = {w1,w2,v4,v2,w5,w6,v7,v6}
addps %1, %2 ; {r0,i0,r1,i1} vshufps %5, %3, %2, 0x4e ; r = {w3,w4,v1,v3,w7,w8,v8,v5}
subps %3, %2 ; {r2,i2,r3,i3} vsubps %4, %5, %1 ; s = r - q
mova %2, %1 vaddps %1, %5, %1 ; u = r + q
shufps %1, %3, 0x88 ; {r0,r1,r2,r3} vpermilps %1, %1, [perm2] ; k = {u1,u2,u3,u4,u6,u5,u7,u8}
shufps %2, %3, 0xdd ; {i0,i1,i2,i3} vshufps %5, %4, %1, 0xbb
vshufps %3, %4, %1, 0xee
vperm2f128 %3, %3, %5, 0x13
vxorps %4, %4, [ps_m1m1p1m1p1m1m1m1] ; s *= {1,1,-1,-1,1,-1,-1,-1}
vshufps %2, %1, %4, 0xdd
vshufps %1, %1, %4, 0x88
vperm2f128 %4, %2, %1, 0x02 ; v = {k1,k3,s1,s3,k2,k4,s2,s4}
vperm2f128 %1, %1, %2, 0x13 ; w = {k6,k8,s6,s8,k5,k7,s5,s7}
vsubps %5, %1, %3
vblendps %1, %5, %1, 0x55 ; w -= {0,s7,0,k7,0,s8,0,k8}
vsubps %2, %4, %1 ; %2 = v - w
vaddps %1, %4, %1 ; %1 = v + w
%endmacro %endmacro
; In SSE mode do one fft4 transforms
; in: %1={r0,i0,r2,i2} %2={r1,i1,r3,i3}
; out: %1={r0,r1,r2,r3} %2={i0,i1,i2,i3}
;
; In AVX mode do two fft4 transforms
; in: %1={r0,i0,r2,i2,r4,i4,r6,i6} %2={r1,i1,r3,i3,r5,i5,r7,i7}
; out: %1={r0,r1,r2,r3,r4,r5,r6,r7} %2={i0,i1,i2,i3,i4,i5,i6,i7}
%macro T4_SSE 3
subps %3, %1, %2 ; {t3,t4,-t8,t7}
addps %1, %1, %2 ; {t1,t2,t6,t5}
xorps %3, %3, [ps_p1p1m1p1]
shufps %2, %1, %3, 0xbe ; {t6,t5,t7,t8}
shufps %1, %1, %3, 0x44 ; {t1,t2,t3,t4}
subps %3, %1, %2 ; {r2,i2,r3,i3}
addps %1, %1, %2 ; {r0,i0,r1,i1}
shufps %2, %1, %3, 0xdd ; {i0,i1,i2,i3}
shufps %1, %1, %3, 0x88 ; {r0,r1,r2,r3}
%endmacro
; In SSE mode do one FFT8
; in: %1={r0,r1,r2,r3} %2={i0,i1,i2,i3} %3={r4,i4,r6,i6} %4={r5,i5,r7,i7} ; in: %1={r0,r1,r2,r3} %2={i0,i1,i2,i3} %3={r4,i4,r6,i6} %4={r5,i5,r7,i7}
; out: %1={r0,r1,r2,r3} %2={i0,i1,i2,i3} %1={r4,r5,r6,r7} %2={i4,i5,i6,i7} ; out: %1={r0,r1,r2,r3} %2={i0,i1,i2,i3} %1={r4,r5,r6,r7} %2={i4,i5,i6,i7}
;
; In AVX mode do two FFT8
; in: %1={r0,i0,r2,i2,r8, i8, r10,i10} %2={r1,i1,r3,i3,r9, i9, r11,i11}
; %3={r4,i4,r6,i6,r12,i12,r14,i14} %4={r5,i5,r7,i7,r13,i13,r15,i15}
; out: %1={r0,r1,r2,r3,r8, r9, r10,r11} %2={i0,i1,i2,i3,i8, i9, i10,i11}
; %3={r4,r5,r6,r7,r12,r13,r14,r15} %4={i4,i5,i6,i7,i12,i13,i14,i15}
%macro T8_SSE 6 %macro T8_SSE 6
mova %6, %3 addps %6, %3, %4 ; {t1,t2,t3,t4}
subps %3, %4 ; {r5,i5,r7,i7} subps %3, %3, %4 ; {r5,i5,r7,i7}
addps %6, %4 ; {t1,t2,t3,t4} shufps %4, %3, %3, 0xb1 ; {i5,r5,i7,r7}
mova %4, %3 mulps %3, %3, [ps_root2mppm] ; {-r5,i5,r7,-i7}
shufps %4, %4, 0xb1 ; {i5,r5,i7,r7} mulps %4, %4, [ps_root2]
mulps %3, [ps_root2mppm] ; {-r5,i5,r7,-i7} addps %3, %3, %4 ; {t8,t7,ta,t9}
mulps %4, [ps_root2] shufps %4, %6, %3, 0x9c ; {t1,t4,t7,ta}
addps %3, %4 ; {t8,t7,ta,t9} shufps %6, %6, %3, 0x36 ; {t3,t2,t9,t8}
mova %4, %6 subps %3, %6, %4 ; {t6,t5,tc,tb}
shufps %6, %3, 0x36 ; {t3,t2,t9,t8} addps %6, %6, %4 ; {t1,t2,t9,ta}
shufps %4, %3, 0x9c ; {t1,t4,t7,ta} shufps %5, %6, %3, 0x8d ; {t2,ta,t6,tc}
mova %3, %6 shufps %6, %6, %3, 0xd8 ; {t1,t9,t5,tb}
addps %6, %4 ; {t1,t2,t9,ta} subps %3, %1, %6 ; {r4,r5,r6,r7}
subps %3, %4 ; {t6,t5,tc,tb} addps %1, %1, %6 ; {r0,r1,r2,r3}
mova %4, %6 subps %4, %2, %5 ; {i4,i5,i6,i7}
shufps %6, %3, 0xd8 ; {t1,t9,t5,tb} addps %2, %2, %5 ; {i0,i1,i2,i3}
shufps %4, %3, 0x8d ; {t2,ta,t6,tc}
mova %3, %1
mova %5, %2
addps %1, %6 ; {r0,r1,r2,r3}
addps %2, %4 ; {i0,i1,i2,i3}
subps %3, %6 ; {r4,r5,r6,r7}
subps %5, %4 ; {i4,i5,i6,i7}
SWAP %4, %5
%endmacro %endmacro
; scheduled for cpu-bound sizes ; scheduled for cpu-bound sizes
@ -148,52 +190,44 @@ section .text align=16
IF%1 mova m4, Z(4) IF%1 mova m4, Z(4)
IF%1 mova m5, Z(5) IF%1 mova m5, Z(5)
mova m0, %2 ; wre mova m0, %2 ; wre
mova m2, m4
mova m1, %3 ; wim mova m1, %3 ; wim
mova m3, m5 mulps m2, m4, m0 ; r2*wre
mulps m2, m0 ; r2*wre
IF%1 mova m6, Z2(6) IF%1 mova m6, Z2(6)
mulps m3, m1 ; i2*wim mulps m3, m5, m1 ; i2*wim
IF%1 mova m7, Z2(7) IF%1 mova m7, Z2(7)
mulps m4, m1 ; r2*wim mulps m4, m4, m1 ; r2*wim
mulps m5, m0 ; i2*wre mulps m5, m5, m0 ; i2*wre
addps m2, m3 ; r2*wre + i2*wim addps m2, m2, m3 ; r2*wre + i2*wim
mova m3, m1 mulps m3, m1, m7 ; i3*wim
mulps m1, m6 ; r3*wim subps m5, m5, m4 ; i2*wre - r2*wim
subps m5, m4 ; i2*wre - r2*wim mulps m1, m1, m6 ; r3*wim
mova m4, m0 mulps m4, m0, m6 ; r3*wre
mulps m3, m7 ; i3*wim mulps m0, m0, m7 ; i3*wre
mulps m4, m6 ; r3*wre subps m4, m4, m3 ; r3*wre - i3*wim
mulps m0, m7 ; i3*wre
subps m4, m3 ; r3*wre - i3*wim
mova m3, Z(0) mova m3, Z(0)
addps m0, m1 ; i3*wre + r3*wim addps m0, m0, m1 ; i3*wre + r3*wim
mova m1, m4 subps m1, m4, m2 ; t3
addps m4, m2 ; t5 addps m4, m4, m2 ; t5
subps m1, m2 ; t3 subps m3, m3, m4 ; r2
subps m3, m4 ; r2 addps m4, m4, Z(0) ; r0
addps m4, Z(0) ; r0
mova m6, Z(2) mova m6, Z(2)
mova Z(4), m3 mova Z(4), m3
mova Z(0), m4 mova Z(0), m4
mova m3, m5 subps m3, m5, m0 ; t4
subps m5, m0 ; t4 subps m4, m6, m3 ; r3
mova m4, m6 addps m3, m3, m6 ; r1
subps m6, m5 ; r3 mova Z2(6), m4
addps m5, m4 ; r1 mova Z(2), m3
mova Z2(6), m6
mova Z(2), m5
mova m2, Z(3) mova m2, Z(3)
addps m3, m0 ; t6 addps m3, m5, m0 ; t6
subps m2, m1 ; i3 subps m2, m2, m1 ; i3
mova m7, Z(1) mova m7, Z(1)
addps m1, Z(3) ; i1 addps m1, m1, Z(3) ; i1
mova Z2(7), m2 mova Z2(7), m2
mova Z(3), m1 mova Z(3), m1
mova m4, m7 subps m4, m7, m3 ; i2
subps m7, m3 ; i2 addps m3, m3, m7 ; i0
addps m3, m4 ; i0 mova Z(5), m4
mova Z(5), m7
mova Z(1), m3 mova Z(1), m3
%endmacro %endmacro
@ -201,77 +235,55 @@ IF%1 mova m7, Z2(7)
%macro PASS_BIG 1 ; (!interleave) %macro PASS_BIG 1 ; (!interleave)
mova m4, Z(4) ; r2 mova m4, Z(4) ; r2
mova m5, Z(5) ; i2 mova m5, Z(5) ; i2
mova m2, m4
mova m0, [wq] ; wre mova m0, [wq] ; wre
mova m3, m5
mova m1, [wq+o1q] ; wim mova m1, [wq+o1q] ; wim
mulps m2, m0 ; r2*wre mulps m2, m4, m0 ; r2*wre
mova m6, Z2(6) ; r3 mova m6, Z2(6) ; r3
mulps m3, m1 ; i2*wim mulps m3, m5, m1 ; i2*wim
mova m7, Z2(7) ; i3 mova m7, Z2(7) ; i3
mulps m4, m1 ; r2*wim mulps m4, m4, m1 ; r2*wim
mulps m5, m0 ; i2*wre mulps m5, m5, m0 ; i2*wre
addps m2, m3 ; r2*wre + i2*wim addps m2, m2, m3 ; r2*wre + i2*wim
mova m3, m1 mulps m3, m1, m7 ; i3*wim
mulps m1, m6 ; r3*wim mulps m1, m1, m6 ; r3*wim
subps m5, m4 ; i2*wre - r2*wim subps m5, m5, m4 ; i2*wre - r2*wim
mova m4, m0 mulps m4, m0, m6 ; r3*wre
mulps m3, m7 ; i3*wim mulps m0, m0, m7 ; i3*wre
mulps m4, m6 ; r3*wre subps m4, m4, m3 ; r3*wre - i3*wim
mulps m0, m7 ; i3*wre
subps m4, m3 ; r3*wre - i3*wim
mova m3, Z(0) mova m3, Z(0)
addps m0, m1 ; i3*wre + r3*wim addps m0, m0, m1 ; i3*wre + r3*wim
mova m1, m4 subps m1, m4, m2 ; t3
addps m4, m2 ; t5 addps m4, m4, m2 ; t5
subps m1, m2 ; t3 subps m3, m3, m4 ; r2
subps m3, m4 ; r2 addps m4, m4, Z(0) ; r0
addps m4, Z(0) ; r0
mova m6, Z(2) mova m6, Z(2)
mova Z(4), m3 mova Z(4), m3
mova Z(0), m4 mova Z(0), m4
mova m3, m5 subps m3, m5, m0 ; t4
subps m5, m0 ; t4 subps m4, m6, m3 ; r3
mova m4, m6 addps m3, m3, m6 ; r1
subps m6, m5 ; r3 IF%1 mova Z2(6), m4
addps m5, m4 ; r1 IF%1 mova Z(2), m3
IF%1 mova Z2(6), m6
IF%1 mova Z(2), m5
mova m2, Z(3) mova m2, Z(3)
addps m3, m0 ; t6 addps m5, m5, m0 ; t6
subps m2, m1 ; i3 subps m2, m2, m1 ; i3
mova m7, Z(1) mova m7, Z(1)
addps m1, Z(3) ; i1 addps m1, m1, Z(3) ; i1
IF%1 mova Z2(7), m2 IF%1 mova Z2(7), m2
IF%1 mova Z(3), m1 IF%1 mova Z(3), m1
mova m4, m7 subps m6, m7, m5 ; i2
subps m7, m3 ; i2 addps m5, m5, m7 ; i0
addps m3, m4 ; i0 IF%1 mova Z(5), m6
IF%1 mova Z(5), m7 IF%1 mova Z(1), m5
IF%1 mova Z(1), m3
%if %1==0 %if %1==0
mova m4, m5 ; r1 INTERL m1, m3, m7, Z, 2
mova m0, m6 ; r3 INTERL m2, m4, m0, Z2, 6
unpcklps m5, m1
unpckhps m4, m1
unpcklps m6, m2
unpckhps m0, m2
mova m1, Z(0) mova m1, Z(0)
mova m2, Z(4) mova m2, Z(4)
mova Z(2), m5
mova Z(3), m4 INTERL m5, m1, m3, Z, 0
mova Z2(6), m6 INTERL m6, m2, m7, Z, 4
mova Z2(7), m0
mova m5, m1 ; r0
mova m4, m2 ; r2
unpcklps m1, m3
unpckhps m5, m3
unpcklps m2, m7
unpckhps m4, m7
mova Z(0), m1
mova Z(1), m5
mova Z(4), m2
mova Z(5), m4
%endif %endif
%endmacro %endmacro
@ -281,13 +293,106 @@ IF%1 mova Z(1), m3
punpckhdq %3, %2 punpckhdq %3, %2
%endmacro %endmacro
INIT_XMM
%define mova movaps
%define Z(x) [r0+mmsize*x] %define Z(x) [r0+mmsize*x]
%define Z2(x) [r0+mmsize*x] %define Z2(x) [r0+mmsize*x]
%define ZH(x) [r0+mmsize*x+mmsize/2]
INIT_YMM
align 16 align 16
fft8_avx:
mova m0, Z(0)
mova m1, Z(1)
T8_AVX m0, m1, m2, m3, m4
mova Z(0), m0
mova Z(1), m1
ret
align 16
fft16_avx:
mova m2, Z(2)
mova m3, Z(3)
T4_SSE m2, m3, m7
mova m0, Z(0)
mova m1, Z(1)
T8_AVX m0, m1, m4, m5, m7
mova m4, [ps_cos16_1]
mova m5, [ps_cos16_2]
vmulps m6, m2, m4
vmulps m7, m3, m5
vaddps m7, m7, m6
vmulps m2, m2, m5
vmulps m3, m3, m4
vsubps m3, m3, m2
vblendps m2, m7, m3, 0xf0
vperm2f128 m3, m7, m3, 0x21
vaddps m4, m2, m3
vsubps m2, m3, m2
vperm2f128 m2, m2, m2, 0x01
vsubps m3, m1, m2
vaddps m1, m1, m2
vsubps m5, m0, m4
vaddps m0, m0, m4
vextractf128 Z(0), m0, 0
vextractf128 ZH(0), m1, 0
vextractf128 Z(1), m0, 1
vextractf128 ZH(1), m1, 1
vextractf128 Z(2), m5, 0
vextractf128 ZH(2), m3, 0
vextractf128 Z(3), m5, 1
vextractf128 ZH(3), m3, 1
ret
align 16
fft32_avx:
call fft16_avx
mova m0, Z(4)
mova m1, Z(5)
T4_SSE m0, m1, m4
mova m2, Z(6)
mova m3, Z(7)
T8_SSE m0, m1, m2, m3, m4, m6
; m0={r0,r1,r2,r3,r8, r9, r10,r11} m1={i0,i1,i2,i3,i8, i9, i10,i11}
; m2={r4,r5,r6,r7,r12,r13,r14,r15} m3={i4,i5,i6,i7,i12,i13,i14,i15}
vperm2f128 m4, m0, m2, 0x20
vperm2f128 m5, m1, m3, 0x20
vperm2f128 m6, m0, m2, 0x31
vperm2f128 m7, m1, m3, 0x31
PASS_SMALL 0, [cos_32], [cos_32+32]
ret
fft32_interleave_avx:
call fft32_avx
mov r2d, 32
.deint_loop:
mova m2, Z(0)
mova m3, Z(1)
vunpcklps m0, m2, m3
vunpckhps m1, m2, m3
vextractf128 Z(0), m0, 0
vextractf128 ZH(0), m1, 0
vextractf128 Z(1), m0, 1
vextractf128 ZH(1), m1, 1
add r0, mmsize*2
sub r2d, mmsize/4
jg .deint_loop
ret
INIT_XMM
%define movdqa movaps
align 16
fft4_avx:
fft4_sse: fft4_sse:
mova m0, Z(0) mova m0, Z(0)
mova m1, Z(1) mova m1, Z(1)
@ -406,6 +511,8 @@ FFT48_3DN _3dn
%define Z(x) [zq + o1q*(x&6) + mmsize*(x&1)] %define Z(x) [zq + o1q*(x&6) + mmsize*(x&1)]
%define Z2(x) [zq + o3q + mmsize*(x&1)] %define Z2(x) [zq + o3q + mmsize*(x&1)]
%define ZH(x) [zq + o1q*(x&6) + mmsize*(x&1) + mmsize/2]
%define Z2H(x) [zq + o3q + mmsize*(x&1) + mmsize/2]
%macro DECL_PASS 2+ ; name, payload %macro DECL_PASS 2+ ; name, payload
align 16 align 16
@ -423,8 +530,34 @@ DEFINE_ARGS z, w, n, o1, o3
rep ret rep ret
%endmacro %endmacro
INIT_YMM
%macro INTERL_AVX 5
vunpckhps %3, %2, %1
vunpcklps %2, %2, %1
vextractf128 %4(%5), %2, 0
vextractf128 %4 %+ H(%5), %3, 0
vextractf128 %4(%5 + 1), %2, 1
vextractf128 %4 %+ H(%5 + 1), %3, 1
%endmacro
%define INTERL INTERL_AVX
DECL_PASS pass_avx, PASS_BIG 1
DECL_PASS pass_interleave_avx, PASS_BIG 0
INIT_XMM INIT_XMM
%define mova movaps
%macro INTERL_SSE 5
mova %3, %2
unpcklps %2, %1
unpckhps %3, %1
mova %4(%5), %2
mova %4(%5+1), %3
%endmacro
%define INTERL INTERL_SSE
DECL_PASS pass_sse, PASS_BIG 1 DECL_PASS pass_sse, PASS_BIG 1
DECL_PASS pass_interleave_sse, PASS_BIG 0 DECL_PASS pass_interleave_sse, PASS_BIG 0
@ -457,9 +590,12 @@ DECL_PASS pass_interleave_3dn, PASS_BIG 0
%macro DECL_FFT 2-3 ; nbits, cpu, suffix %macro DECL_FFT 2-3 ; nbits, cpu, suffix
%xdefine list_of_fft fft4%2 SECTION_REL, fft8%2 SECTION_REL %xdefine list_of_fft fft4%2 SECTION_REL, fft8%2 SECTION_REL
%if %1==5 %if %1>=5
%xdefine list_of_fft list_of_fft, fft16%2 SECTION_REL %xdefine list_of_fft list_of_fft, fft16%2 SECTION_REL
%endif %endif
%if %1>=6
%xdefine list_of_fft list_of_fft, fft32%3%2 SECTION_REL
%endif
%assign n 1<<%1 %assign n 1<<%1
%rep 17-%1 %rep 17-%1
@ -492,9 +628,14 @@ section .text
; The others pass args in registers and don't spill anything. ; The others pass args in registers and don't spill anything.
cglobal fft_dispatch%3%2, 2,5,8, z, nbits cglobal fft_dispatch%3%2, 2,5,8, z, nbits
FFT_DISPATCH %3%2, nbits FFT_DISPATCH %3%2, nbits
%ifidn %2, _avx
vzeroupper
%endif
RET RET
%endmacro ; DECL_FFT %endmacro ; DECL_FFT
DECL_FFT 6, _avx
DECL_FFT 6, _avx, _interleave
DECL_FFT 5, _sse DECL_FFT 5, _sse
DECL_FFT 5, _sse, _interleave DECL_FFT 5, _sse, _interleave
DECL_FFT 4, _3dn DECL_FFT 4, _3dn
@ -533,21 +674,53 @@ INIT_XMM
%endmacro %endmacro
%macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5 %macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5
movaps xmm6, [%4+%1*2] mulps m6, %3, [%5+%1]
movaps %2, [%4+%1*2+0x10] mulps m7, %2, [%5+%1]
movaps %3, xmm6 mulps %2, %2, [%6+%1]
movaps xmm7, %2 mulps %3, %3, [%6+%1]
mulps xmm6, [%5+%1] subps %2, %2, m6
mulps %2, [%6+%1] addps %3, %3, m7
mulps %3, [%6+%1] %endmacro
mulps xmm7, [%5+%1]
subps %2, xmm6 %macro POSROTATESHUF_AVX 5 ;j, k, z+n8, tcos+n8, tsin+n8
addps %3, xmm7 .post:
vmovaps ymm1, [%3+%1*2]
vmovaps ymm0, [%3+%1*2+0x20]
vmovaps ymm3, [%3+%2*2]
vmovaps ymm2, [%3+%2*2+0x20]
CMUL %1, ymm0, ymm1, %3, %4, %5
CMUL %2, ymm2, ymm3, %3, %4, %5
vshufps ymm1, ymm1, ymm1, 0x1b
vshufps ymm3, ymm3, ymm3, 0x1b
vperm2f128 ymm1, ymm1, ymm1, 0x01
vperm2f128 ymm3, ymm3, ymm3, 0x01
vunpcklps ymm6, ymm2, ymm1
vunpckhps ymm4, ymm2, ymm1
vunpcklps ymm7, ymm0, ymm3
vunpckhps ymm5, ymm0, ymm3
vextractf128 [%3+%1*2], ymm7, 0
vextractf128 [%3+%1*2+0x10], ymm5, 0
vextractf128 [%3+%1*2+0x20], ymm7, 1
vextractf128 [%3+%1*2+0x30], ymm5, 1
vextractf128 [%3+%2*2], ymm6, 0
vextractf128 [%3+%2*2+0x10], ymm4, 0
vextractf128 [%3+%2*2+0x20], ymm6, 1
vextractf128 [%3+%2*2+0x30], ymm4, 1
sub %2, 0x20
add %1, 0x20
jl .post
%endmacro %endmacro
%macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8 %macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8
.post: .post:
movaps xmm1, [%3+%1*2]
movaps xmm0, [%3+%1*2+0x10]
CMUL %1, xmm0, xmm1, %3, %4, %5 CMUL %1, xmm0, xmm1, %3, %4, %5
movaps xmm5, [%3+%2*2]
movaps xmm4, [%3+%2*2+0x10]
CMUL %2, xmm4, xmm5, %3, %4, %5 CMUL %2, xmm4, xmm5, %3, %4, %5
shufps xmm1, xmm1, 0x1b shufps xmm1, xmm1, 0x1b
shufps xmm5, xmm5, 0x1b shufps xmm5, xmm5, 0x1b
@ -566,7 +739,8 @@ INIT_XMM
jl .post jl .post
%endmacro %endmacro
cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *input %macro DECL_IMDCT 2
cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *input
%ifdef ARCH_X86_64 %ifdef ARCH_X86_64
%define rrevtab r10 %define rrevtab r10
%define rtcos r11 %define rtcos r11
@ -641,7 +815,7 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
mov r0, r1 mov r0, r1
mov r1d, [r5+FFTContext.nbits] mov r1d, [r5+FFTContext.nbits]
FFT_DISPATCH _sse, r1 FFT_DISPATCH %1, r1
mov r0d, [r5+FFTContext.mdctsize] mov r0d, [r5+FFTContext.mdctsize]
add r6, r0 add r6, r0
@ -653,14 +827,24 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
mov rtsin, [esp+4] mov rtsin, [esp+4]
%endif %endif
neg r0 neg r0
mov r1, -16 mov r1, -mmsize
sub r1, r0 sub r1, r0
POSROTATESHUF r0, r1, r6, rtcos, rtsin %2 r0, r1, r6, rtcos, rtsin
%ifdef ARCH_X86_64 %ifdef ARCH_X86_64
pop r14 pop r14
pop r13 pop r13
pop r12 pop r12
%else %else
add esp, 12 add esp, 12
%endif
%ifidn avx_enabled, 1
vzeroupper
%endif %endif
RET RET
%endmacro
DECL_IMDCT _sse, POSROTATESHUF
INIT_YMM
DECL_IMDCT _avx, POSROTATESHUF_AVX

View File

@ -28,6 +28,12 @@ DECLARE_ASM_CONST(16, int, ff_m1m1m1m1)[4] =
void ff_fft_dispatch_sse(FFTComplex *z, int nbits); void ff_fft_dispatch_sse(FFTComplex *z, int nbits);
void ff_fft_dispatch_interleave_sse(FFTComplex *z, int nbits); void ff_fft_dispatch_interleave_sse(FFTComplex *z, int nbits);
void ff_fft_dispatch_interleave_avx(FFTComplex *z, int nbits);
void ff_fft_calc_avx(FFTContext *s, FFTComplex *z)
{
ff_fft_dispatch_interleave_avx(z, s->nbits);
}
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z) void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
{ {
@ -77,7 +83,7 @@ void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input)
long n = s->mdct_size; long n = s->mdct_size;
long n4 = n >> 2; long n4 = n >> 2;
ff_imdct_half_sse(s, output+n4, input); s->imdct_half(s, output + n4, input);
j = -n; j = -n;
k = n-16; k = n-16;

View File

@ -1,10 +1,11 @@
;***************************************************************************** ;*****************************************************************************
;* x86inc.asm ;* x86inc.asm
;***************************************************************************** ;*****************************************************************************
;* Copyright (C) 2005-2008 x264 project ;* Copyright (C) 2005-2011 x264 project
;* ;*
;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Authors: Loren Merritt <lorenm@u.washington.edu>
;* Anton Mitrofanov <BugMaster@narod.ru> ;* Anton Mitrofanov <BugMaster@narod.ru>
;* Jason Garrett-Glaser <darkshikari@gmail.com>
;* ;*
;* Permission to use, copy, modify, and/or distribute this software for any ;* Permission to use, copy, modify, and/or distribute this software for any
;* purpose with or without fee is hereby granted, provided that the above ;* purpose with or without fee is hereby granted, provided that the above
@ -499,6 +500,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endmacro %endmacro
%macro INIT_MMX 0 %macro INIT_MMX 0
%assign avx_enabled 0
%define RESET_MM_PERMUTATION INIT_MMX %define RESET_MM_PERMUTATION INIT_MMX
%define mmsize 8 %define mmsize 8
%define num_mmregs 8 %define num_mmregs 8
@ -520,6 +522,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endmacro %endmacro
%macro INIT_XMM 0 %macro INIT_XMM 0
%assign avx_enabled 0
%define RESET_MM_PERMUTATION INIT_XMM %define RESET_MM_PERMUTATION INIT_XMM
%define mmsize 16 %define mmsize 16
%define num_mmregs 8 %define num_mmregs 8
@ -538,6 +541,31 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endrep %endrep
%endmacro %endmacro
%macro INIT_AVX 0
INIT_XMM
%assign avx_enabled 1
%define PALIGNR PALIGNR_SSSE3
%define RESET_MM_PERMUTATION INIT_AVX
%endmacro
%macro INIT_YMM 0
%assign avx_enabled 1
%define RESET_MM_PERMUTATION INIT_YMM
%define mmsize 32
%define num_mmregs 8
%ifdef ARCH_X86_64
%define num_mmregs 16
%endif
%define mova vmovaps
%define movu vmovups
%assign %%i 0
%rep num_mmregs
CAT_XDEFINE m, %%i, ymm %+ %%i
CAT_XDEFINE nymm, %%i, %%i
%assign %%i %%i+1
%endrep
%endmacro
INIT_MMX INIT_MMX
; I often want to use macros that permute their arguments. e.g. there's no ; I often want to use macros that permute their arguments. e.g. there's no
@ -645,3 +673,222 @@ INIT_MMX
sub %1, %2 sub %1, %2
%endif %endif
%endmacro %endmacro
;=============================================================================
; AVX abstraction layer
;=============================================================================
%assign i 0
%rep 16
%if i < 8
CAT_XDEFINE sizeofmm, i, 8
%endif
CAT_XDEFINE sizeofxmm, i, 16
CAT_XDEFINE sizeofymm, i, 32
%assign i i+1
%endrep
%undef i
;%1 == instruction
;%2 == 1 if float, 0 if int
;%3 == 0 if 3-operand (xmm, xmm, xmm), 1 if 4-operand (xmm, xmm, xmm, imm)
;%4 == number of operands given
;%5+: operands
%macro RUN_AVX_INSTR 6-7+
%if sizeof%5==32
v%1 %5, %6, %7
%else
%if sizeof%5==8
%define %%regmov movq
%elif %2
%define %%regmov movaps
%else
%define %%regmov movdqa
%endif
%if %4>=3+%3
%ifnidn %5, %6
%if avx_enabled && sizeof%5==16
v%1 %5, %6, %7
%else
%%regmov %5, %6
%1 %5, %7
%endif
%else
%1 %5, %7
%endif
%elif %3
%1 %5, %6, %7
%else
%1 %5, %6
%endif
%endif
%endmacro
;%1 == instruction
;%2 == 1 if float, 0 if int
;%3 == 0 if 3-operand (xmm, xmm, xmm), 1 if 4-operand (xmm, xmm, xmm, imm)
%macro AVX_INSTR 3
%macro %1 2-8 fnord, fnord, fnord, %1, %2, %3
%ifidn %3, fnord
RUN_AVX_INSTR %6, %7, %8, 2, %1, %2
%elifidn %4, fnord
RUN_AVX_INSTR %6, %7, %8, 3, %1, %2, %3
%elifidn %5, fnord
RUN_AVX_INSTR %6, %7, %8, 4, %1, %2, %3, %4
%else
RUN_AVX_INSTR %6, %7, %8, 5, %1, %2, %3, %4, %5
%endif
%endmacro
%endmacro
AVX_INSTR addpd, 1, 0
AVX_INSTR addps, 1, 0
AVX_INSTR addsd, 1, 0
AVX_INSTR addss, 1, 0
AVX_INSTR addsubpd, 1, 0
AVX_INSTR addsubps, 1, 0
AVX_INSTR andpd, 1, 0
AVX_INSTR andps, 1, 0
AVX_INSTR andnpd, 1, 0
AVX_INSTR andnps, 1, 0
AVX_INSTR blendpd, 1, 0
AVX_INSTR blendps, 1, 0
AVX_INSTR blendvpd, 1, 0
AVX_INSTR blendvps, 1, 0
AVX_INSTR cmppd, 1, 0
AVX_INSTR cmpps, 1, 0
AVX_INSTR cmpsd, 1, 0
AVX_INSTR cmpss, 1, 0
AVX_INSTR divpd, 1, 0
AVX_INSTR divps, 1, 0
AVX_INSTR divsd, 1, 0
AVX_INSTR divss, 1, 0
AVX_INSTR dppd, 1, 0
AVX_INSTR dpps, 1, 0
AVX_INSTR haddpd, 1, 0
AVX_INSTR haddps, 1, 0
AVX_INSTR hsubpd, 1, 0
AVX_INSTR hsubps, 1, 0
AVX_INSTR maxpd, 1, 0
AVX_INSTR maxps, 1, 0
AVX_INSTR maxsd, 1, 0
AVX_INSTR maxss, 1, 0
AVX_INSTR minpd, 1, 0
AVX_INSTR minps, 1, 0
AVX_INSTR minsd, 1, 0
AVX_INSTR minss, 1, 0
AVX_INSTR mpsadbw, 0, 1
AVX_INSTR mulpd, 1, 0
AVX_INSTR mulps, 1, 0
AVX_INSTR mulsd, 1, 0
AVX_INSTR mulss, 1, 0
AVX_INSTR orpd, 1, 0
AVX_INSTR orps, 1, 0
AVX_INSTR packsswb, 0, 0
AVX_INSTR packssdw, 0, 0
AVX_INSTR packuswb, 0, 0
AVX_INSTR packusdw, 0, 0
AVX_INSTR paddb, 0, 0
AVX_INSTR paddw, 0, 0
AVX_INSTR paddd, 0, 0
AVX_INSTR paddq, 0, 0
AVX_INSTR paddsb, 0, 0
AVX_INSTR paddsw, 0, 0
AVX_INSTR paddusb, 0, 0
AVX_INSTR paddusw, 0, 0
AVX_INSTR palignr, 0, 1
AVX_INSTR pand, 0, 0
AVX_INSTR pandn, 0, 0
AVX_INSTR pavgb, 0, 0
AVX_INSTR pavgw, 0, 0
AVX_INSTR pblendvb, 0, 0
AVX_INSTR pblendw, 0, 1
AVX_INSTR pcmpestri, 0, 0
AVX_INSTR pcmpestrm, 0, 0
AVX_INSTR pcmpistri, 0, 0
AVX_INSTR pcmpistrm, 0, 0
AVX_INSTR pcmpeqb, 0, 0
AVX_INSTR pcmpeqw, 0, 0
AVX_INSTR pcmpeqd, 0, 0
AVX_INSTR pcmpeqq, 0, 0
AVX_INSTR pcmpgtb, 0, 0
AVX_INSTR pcmpgtw, 0, 0
AVX_INSTR pcmpgtd, 0, 0
AVX_INSTR pcmpgtq, 0, 0
AVX_INSTR phaddw, 0, 0
AVX_INSTR phaddd, 0, 0
AVX_INSTR phaddsw, 0, 0
AVX_INSTR phsubw, 0, 0
AVX_INSTR phsubd, 0, 0
AVX_INSTR phsubsw, 0, 0
AVX_INSTR pmaddwd, 0, 0
AVX_INSTR pmaddubsw, 0, 0
AVX_INSTR pmaxsb, 0, 0
AVX_INSTR pmaxsw, 0, 0
AVX_INSTR pmaxsd, 0, 0
AVX_INSTR pmaxub, 0, 0
AVX_INSTR pmaxuw, 0, 0
AVX_INSTR pmaxud, 0, 0
AVX_INSTR pminsb, 0, 0
AVX_INSTR pminsw, 0, 0
AVX_INSTR pminsd, 0, 0
AVX_INSTR pminub, 0, 0
AVX_INSTR pminuw, 0, 0
AVX_INSTR pminud, 0, 0
AVX_INSTR pmulhuw, 0, 0
AVX_INSTR pmulhrsw, 0, 0
AVX_INSTR pmulhw, 0, 0
AVX_INSTR pmullw, 0, 0
AVX_INSTR pmulld, 0, 0
AVX_INSTR pmuludq, 0, 0
AVX_INSTR pmuldq, 0, 0
AVX_INSTR por, 0, 0
AVX_INSTR psadbw, 0, 0
AVX_INSTR pshufb, 0, 0
AVX_INSTR psignb, 0, 0
AVX_INSTR psignw, 0, 0
AVX_INSTR psignd, 0, 0
AVX_INSTR psllw, 0, 0
AVX_INSTR pslld, 0, 0
AVX_INSTR psllq, 0, 0
AVX_INSTR pslldq, 0, 0
AVX_INSTR psraw, 0, 0
AVX_INSTR psrad, 0, 0
AVX_INSTR psrlw, 0, 0
AVX_INSTR psrld, 0, 0
AVX_INSTR psrlq, 0, 0
AVX_INSTR psrldq, 0, 0
AVX_INSTR psubb, 0, 0
AVX_INSTR psubw, 0, 0
AVX_INSTR psubd, 0, 0
AVX_INSTR psubq, 0, 0
AVX_INSTR psubsb, 0, 0
AVX_INSTR psubsw, 0, 0
AVX_INSTR psubusb, 0, 0
AVX_INSTR psubusw, 0, 0
AVX_INSTR punpckhbw, 0, 0
AVX_INSTR punpckhwd, 0, 0
AVX_INSTR punpckhdq, 0, 0
AVX_INSTR punpckhqdq, 0, 0
AVX_INSTR punpcklbw, 0, 0
AVX_INSTR punpcklwd, 0, 0
AVX_INSTR punpckldq, 0, 0
AVX_INSTR punpcklqdq, 0, 0
AVX_INSTR pxor, 0, 0
AVX_INSTR shufps, 0, 1
AVX_INSTR subpd, 1, 0
AVX_INSTR subps, 1, 0
AVX_INSTR subsd, 1, 0
AVX_INSTR subss, 1, 0
AVX_INSTR unpckhpd, 1, 0
AVX_INSTR unpckhps, 1, 0
AVX_INSTR unpcklpd, 1, 0
AVX_INSTR unpcklps, 1, 0
AVX_INSTR xorpd, 1, 0
AVX_INSTR xorps, 1, 0
; 3DNow instructions, for sharing code between AVX, SSE and 3DN
AVX_INSTR pfadd, 1, 0
AVX_INSTR pfsub, 1, 0
AVX_INSTR pfmul, 1, 0

View File

@ -26,8 +26,8 @@
#include "libavutil/samplefmt.h" #include "libavutil/samplefmt.h"
#define LIBAVFILTER_VERSION_MAJOR 2 #define LIBAVFILTER_VERSION_MAJOR 2
#define LIBAVFILTER_VERSION_MINOR 0 #define LIBAVFILTER_VERSION_MINOR 3
#define LIBAVFILTER_VERSION_MICRO 0 #define LIBAVFILTER_VERSION_MICRO 1
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
LIBAVFILTER_VERSION_MINOR, \ LIBAVFILTER_VERSION_MINOR, \
@ -115,7 +115,7 @@ typedef struct AVFilterBufferRefVideoProps {
AVRational pixel_aspect; ///< pixel aspect ratio AVRational pixel_aspect; ///< pixel aspect ratio
int interlaced; ///< is frame interlaced int interlaced; ///< is frame interlaced
int top_field_first; ///< field order int top_field_first; ///< field order
int pict_type; ///< Picture type of the frame enum AVPictureType pict_type; ///< picture type of the frame
int key_frame; ///< 1 -> keyframe, 0-> not int key_frame; ///< 1 -> keyframe, 0-> not
} AVFilterBufferRefVideoProps; } AVFilterBufferRefVideoProps;

View File

@ -75,8 +75,8 @@ typedef struct {
int input_is_pal; ///< set to 1 if the input format is paletted int input_is_pal; ///< set to 1 if the input format is paletted
int interlaced; int interlaced;
char w_expr[256]; ///< width expression string char w_expr[256]; ///< width expression string
char h_expr[256]; ///< height expression string char h_expr[256]; ///< height expression string
} ScaleContext; } ScaleContext;
static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque) static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)

View File

@ -1,7 +1,6 @@
/* /*
* Copyright (C) 2006-2010 Michael Niedermayer <michaelni@gmx.at> * Copyright (C) 2006-2010 Michael Niedermayer <michaelni@gmx.at>
* 2010 James Darnley <james.darnley@gmail.com> * 2010 James Darnley <james.darnley@gmail.com>
* This file is part of FFmpeg.
* *
* FFmpeg is free software; you can redistribute it and/or modify * FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by

View File

@ -131,7 +131,7 @@ static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
(n = sscanf(args, "%d:%d:%127[^:]:%d:%d:%d:%d", &c->w, &c->h, pix_fmt_str, (n = sscanf(args, "%d:%d:%127[^:]:%d:%d:%d:%d", &c->w, &c->h, pix_fmt_str,
&c->time_base.num, &c->time_base.den, &c->time_base.num, &c->time_base.den,
&c->pixel_aspect.num, &c->pixel_aspect.den)) != 7) { &c->pixel_aspect.num, &c->pixel_aspect.den)) != 7) {
av_log(ctx, AV_LOG_ERROR, "Expected 7 arguments, but %d found in '%s'\n", n, args); av_log(ctx, AV_LOG_ERROR, "Expected 7 arguments, but only %d found in '%s'\n", n, args);
return AVERROR(EINVAL); return AVERROR(EINVAL);
} }
if ((c->pix_fmt = av_get_pix_fmt(pix_fmt_str)) == PIX_FMT_NONE) { if ((c->pix_fmt = av_get_pix_fmt(pix_fmt_str)) == PIX_FMT_NONE) {

View File

@ -175,7 +175,6 @@ static void build_frame_code(AVFormatContext *s){
} }
key_frame= intra_only; key_frame= intra_only;
#if 1
if(is_audio){ if(is_audio){
int frame_bytes= codec->frame_size*(int64_t)codec->bit_rate / (8*codec->sample_rate); int frame_bytes= codec->frame_size*(int64_t)codec->bit_rate / (8*codec->sample_rate);
int pts; int pts;
@ -199,7 +198,6 @@ static void build_frame_code(AVFormatContext *s){
ft->pts_delta=1; ft->pts_delta=1;
start2++; start2++;
} }
#endif
if(codec->has_b_frames){ if(codec->has_b_frames){
pred_count=5; pred_count=5;

View File

@ -40,7 +40,7 @@
#define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c) #define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
#define LIBAVUTIL_VERSION_MAJOR 51 #define LIBAVUTIL_VERSION_MAJOR 51
#define LIBAVUTIL_VERSION_MINOR 0 #define LIBAVUTIL_VERSION_MINOR 1
#define LIBAVUTIL_VERSION_MICRO 0 #define LIBAVUTIL_VERSION_MICRO 0
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
@ -97,6 +97,25 @@ enum AVMediaType {
#define AV_TIME_BASE 1000000 #define AV_TIME_BASE 1000000
#define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE} #define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE}
enum AVPictureType {
AV_PICTURE_TYPE_I = 1, ///< Intra
AV_PICTURE_TYPE_P, ///< Predicted
AV_PICTURE_TYPE_B, ///< Bi-dir predicted
AV_PICTURE_TYPE_S, ///< S(GMC)-VOP MPEG4
AV_PICTURE_TYPE_SI, ///< Switching Intra
AV_PICTURE_TYPE_SP, ///< Switching Predicted
AV_PICTURE_TYPE_BI, ///< BI type
};
/**
* Return a single letter to describe the given picture type
* pict_type.
*
* @param[in] pict_type the picture type @return a single character
* representing the picture type, '?' if pict_type is unknown
*/
char av_get_picture_type_char(enum AVPictureType pict_type);
#include "common.h" #include "common.h"
#include "error.h" #include "error.h"
#include "mathematics.h" #include "mathematics.h"

View File

@ -69,21 +69,21 @@ void *av_malloc(size_t size)
#endif #endif
/* let's disallow possible ambiguous cases */ /* let's disallow possible ambiguous cases */
if(size > (INT_MAX-16) ) if(size > (INT_MAX-32) )
return NULL; return NULL;
#if CONFIG_MEMALIGN_HACK #if CONFIG_MEMALIGN_HACK
ptr = malloc(size+16); ptr = malloc(size+32);
if(!ptr) if(!ptr)
return ptr; return ptr;
diff= ((-(long)ptr - 1)&15) + 1; diff= ((-(long)ptr - 1)&31) + 1;
ptr = (char*)ptr + diff; ptr = (char*)ptr + diff;
((char*)ptr)[-1]= diff; ((char*)ptr)[-1]= diff;
#elif HAVE_POSIX_MEMALIGN #elif HAVE_POSIX_MEMALIGN
if (posix_memalign(&ptr,16,size)) if (posix_memalign(&ptr,32,size))
ptr = NULL; ptr = NULL;
#elif HAVE_MEMALIGN #elif HAVE_MEMALIGN
ptr = memalign(16,size); ptr = memalign(32,size);
/* Why 64? /* Why 64?
Indeed, we should align it: Indeed, we should align it:
on 4 for 386 on 4 for 386
@ -93,10 +93,8 @@ void *av_malloc(size_t size)
Because L1 and L2 caches are aligned on those values. Because L1 and L2 caches are aligned on those values.
But I don't want to code such logic here! But I don't want to code such logic here!
*/ */
/* Why 16? /* Why 32?
Because some CPUs need alignment, for example SSE2 on P4, & most RISC CPUs For AVX ASM. SSE / NEON needs only 16.
it will just trigger an exception and the unaligned load will be done in the
exception handler or it will just segfault (SSE2 on P4).
Why not larger? Because I did not see a difference in benchmarks ... Why not larger? Because I did not see a difference in benchmarks ...
*/ */
/* benchmarks with P3 /* benchmarks with P3

View File

@ -218,7 +218,6 @@ int main(void){
printf("\n"); printf("\n");
} }
#if 1
for(i=0; i<LEN; i++){ for(i=0; i<LEN; i++){
double v[LEN]; double v[LEN];
double error=0; double error=0;
@ -233,7 +232,7 @@ int main(void){
printf("%f ", error); printf("%f ", error);
} }
printf("\n"); printf("\n");
#endif
for(i=0; i<LEN; i++){ for(i=0; i<LEN; i++){
for(j=0; j<LEN; j++){ for(j=0; j<LEN; j++){
printf("%9.6f ", eigenvector[i + j*LEN]); printf("%9.6f ", eigenvector[i + j*LEN]);

View File

@ -39,3 +39,17 @@ const char *avutil_license(void)
#define LICENSE_PREFIX "libavutil license: " #define LICENSE_PREFIX "libavutil license: "
return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1; return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
} }
char av_get_picture_type_char(enum AVPictureType pict_type)
{
switch (pict_type) {
case AV_PICTURE_TYPE_I: return 'I';
case AV_PICTURE_TYPE_P: return 'P';
case AV_PICTURE_TYPE_B: return 'B';
case AV_PICTURE_TYPE_S: return 'S';
case AV_PICTURE_TYPE_SI: return 'i';
case AV_PICTURE_TYPE_SP: return 'p';
case AV_PICTURE_TYPE_BI: return 'b';
default: return '?';
}
}

View File

@ -1,2 +1,2 @@
5ddb6d25dd117db29627f9d286153a7a *./tests/data/acodec/ac3.rm 0f14801e166819dd4a58981aea36e08b *./tests/data/acodec/ac3.rm
98751 ./tests/data/acodec/ac3.rm 98751 ./tests/data/acodec/ac3.rm

View File

@ -1,2 +1,2 @@
a1c71456f21d5459d2824d75bbdcc80c *./tests/data/lavf/lavf.rm 2e3d6b1944c6cd2cf14e13055aecf82a *./tests/data/lavf/lavf.rm
346706 ./tests/data/lavf/lavf.rm 346706 ./tests/data/lavf/lavf.rm

View File

@ -11,7 +11,8 @@ ret:-1 st:-1 flags:1 ts: 1.470835
ret:-1 st: 0 flags:0 ts: 0.365000 ret:-1 st: 0 flags:0 ts: 0.365000
ret: 0 st: 0 flags:1 ts:-0.741000 ret: 0 st: 0 flags:1 ts:-0.741000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st:-1 flags:0 ts: 2.153336 ret: 0 st:-1 flags:0 ts: 2.153336
ret: 0 st: 0 flags:1 dts: 2.159000 pts: 2.159000 pos: 35567 size: 556
ret:-1 st:-1 flags:1 ts: 1.047503 ret:-1 st:-1 flags:1 ts: 1.047503
ret: 0 st: 0 flags:0 ts:-0.058000 ret: 0 st: 0 flags:0 ts:-0.058000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556