Bug 1582271 - pt2 - add ffvpx mp3 decoding for linux/macOS/win. r=jya

Differential Revision: https://phabricator.services.mozilla.com/D46424

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Michael Froman 2019-09-24 21:18:24 +00:00
parent 36adeaea97
commit e290922048
71 changed files with 13016 additions and 52 deletions

View File

@ -1,7 +1,7 @@
/* Automatically generated by configure - do not modify! */
#ifndef FFMPEG_CONFIG_H
#define FFMPEG_CONFIG_H
#define FFMPEG_CONFIGURATION "--disable-everything --disable-protocols --disable-demuxers --disable-muxers --disable-filters --disable-programs --disable-doc --disable-parsers --disable-static --enable-shared --disable-debug --disable-sdl2 --disable-libxcb --disable-securetransport --disable-iconv --disable-swresample --disable-swscale --disable-avdevice --disable-avfilter --disable-avformat --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vdpau --disable-videotoolbox --enable-decoder=flac --disable-crystalhd --disable-asm --disable-cuda --disable-cuvid"
#define FFMPEG_CONFIGURATION "--disable-everything --disable-protocols --disable-demuxers --disable-muxers --disable-filters --disable-programs --disable-doc --disable-parsers --disable-static --enable-shared --disable-debug --disable-sdl2 --disable-libxcb --disable-securetransport --disable-iconv --disable-swresample --disable-swscale --disable-avdevice --disable-avfilter --disable-avformat --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vdpau --disable-videotoolbox --enable-decoder=flac --disable-crystalhd --disable-asm --disable-cuda --disable-cuvid --enable-decoder=mp3"
#define FFMPEG_LICENSE "LGPL version 2.1 or later"
#define CONFIG_THIS_YEAR 2018
#define FFMPEG_DATADIR "/usr/local/share/ffmpeg"
@ -539,7 +539,7 @@
#define CONFIG_FFPLAY 0
#define CONFIG_FFPROBE 0
#define CONFIG_FFMPEG 0
#define CONFIG_DCT 0
#define CONFIG_DCT 1
#define CONFIG_DWT 0
#define CONFIG_ERROR_RESILIENCE 0
#define CONFIG_FAAN 1
@ -613,9 +613,9 @@
#define CONFIG_LZF 0
#define CONFIG_ME_CMP 0
#define CONFIG_MPEG_ER 0
#define CONFIG_MPEGAUDIO 0
#define CONFIG_MPEGAUDIODSP 0
#define CONFIG_MPEGAUDIOHEADER 0
#define CONFIG_MPEGAUDIO 1
#define CONFIG_MPEGAUDIODSP 1
#define CONFIG_MPEGAUDIOHEADER 1
#define CONFIG_MPEGVIDEO 0
#define CONFIG_MPEGVIDEOENC 0
#define CONFIG_MSS34DSP 0
@ -654,4 +654,5 @@
#define CONFIG_FLAC_DECODER 1
#define CONFIG_VP8_PARSER 0
#define CONFIG_VP9_PARSER 0
#define CONFIG_MP3_DECODER 1
#endif /* FFMPEG_CONFIG_H */

View File

@ -524,7 +524,7 @@
%define CONFIG_FFPLAY 0
%define CONFIG_FFPROBE 0
%define CONFIG_FFMPEG 0
%define CONFIG_DCT 0
%define CONFIG_DCT 1
%define CONFIG_DWT 0
%define CONFIG_ERROR_RESILIENCE 0
%define CONFIG_FAAN 1
@ -599,9 +599,9 @@
%define CONFIG_LZF 0
%define CONFIG_ME_CMP 0
%define CONFIG_MPEG_ER 0
%define CONFIG_MPEGAUDIO 0
%define CONFIG_MPEGAUDIODSP 0
%define CONFIG_MPEGAUDIOHEADER 0
%define CONFIG_MPEGAUDIO 1
%define CONFIG_MPEGAUDIODSP 1
%define CONFIG_MPEGAUDIOHEADER 1
%define CONFIG_MPEGVIDEO 0
%define CONFIG_MPEGVIDEOENC 0
%define CONFIG_MSS34DSP 0
@ -640,3 +640,4 @@
%define CONFIG_FLAC_DECODER 1
%define CONFIG_VP8_PARSER 1
%define CONFIG_VP9_PARSER 1
%define CONFIG_MP3_DECODER 1

View File

@ -1,7 +1,7 @@
/* Automatically generated by configure - do not modify! */
#ifndef FFMPEG_CONFIG_H
#define FFMPEG_CONFIG_H
#define FFMPEG_CONFIGURATION "--disable-everything --disable-protocols --disable-demuxers --disable-muxers --disable-filters --disable-programs --disable-doc --disable-parsers --enable-parser=vp8 --enable-parser=vp9 --enable-decoder=vp8 --enable-decoder=vp9 --disable-static --enable-shared --disable-debug --disable-sdl2 --disable-libxcb --disable-securetransport --disable-iconv --disable-swresample --disable-swscale --disable-avdevice --disable-avfilter --disable-avformat --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vdpau --disable-videotoolbox --enable-decoder=flac --enable-asm --enable-x86asm --disable-cuda --disable-cuvid"
#define FFMPEG_CONFIGURATION "--disable-everything --disable-protocols --disable-demuxers --disable-muxers --disable-filters --disable-programs --disable-doc --disable-parsers --enable-parser=vp8 --enable-parser=vp9 --enable-decoder=vp8 --enable-decoder=vp9 --disable-static --enable-shared --disable-debug --disable-sdl2 --disable-libxcb --disable-securetransport --disable-iconv --disable-swresample --disable-swscale --disable-avdevice --disable-avfilter --disable-avformat --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vdpau --disable-videotoolbox --enable-decoder=flac --enable-asm --enable-x86asm --disable-cuda --disable-cuvid --enable-decoder=mp3"
#define FFMPEG_LICENSE "LGPL version 2.1 or later"
#define CONFIG_THIS_YEAR 2018
#define FFMPEG_DATADIR "/usr/local/share/ffmpeg"
@ -539,7 +539,7 @@
#define CONFIG_FFPLAY 0
#define CONFIG_FFPROBE 0
#define CONFIG_FFMPEG 0
#define CONFIG_DCT 0
#define CONFIG_DCT 1
#define CONFIG_DWT 0
#define CONFIG_ERROR_RESILIENCE 0
#define CONFIG_FAAN 1
@ -614,9 +614,9 @@
#define CONFIG_LZF 0
#define CONFIG_ME_CMP 0
#define CONFIG_MPEG_ER 0
#define CONFIG_MPEGAUDIO 0
#define CONFIG_MPEGAUDIODSP 0
#define CONFIG_MPEGAUDIOHEADER 0
#define CONFIG_MPEGAUDIO 1
#define CONFIG_MPEGAUDIODSP 1
#define CONFIG_MPEGAUDIOHEADER 1
#define CONFIG_MPEGVIDEO 0
#define CONFIG_MPEGVIDEOENC 0
#define CONFIG_MSS34DSP 0
@ -655,4 +655,5 @@
#define CONFIG_FLAC_DECODER 1
#define CONFIG_VP8_PARSER 1
#define CONFIG_VP9_PARSER 1
#define CONFIG_MP3_DECODER 1
#endif /* FFMPEG_CONFIG_H */

View File

@ -1,7 +1,7 @@
/* Automatically generated by configure - do not modify! */
#ifndef FFMPEG_CONFIG_H
#define FFMPEG_CONFIG_H
#define FFMPEG_CONFIGURATION "--disable-everything --disable-protocols --disable-demuxers --disable-muxers --disable-filters --disable-programs --disable-doc --disable-parsers --enable-parser=vp8 --enable-parser=vp9 --enable-decoder=vp8 --enable-decoder=vp9 --disable-static --enable-shared --disable-debug --disable-sdl2 --disable-libxcb --disable-securetransport --disable-iconv --disable-swresample --disable-swscale --disable-avdevice --disable-avfilter --disable-avformat --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vdpau --disable-videotoolbox --enable-decoder=flac --disable-asm --disable-x86asm --disable-cuda --disable-cuvid"
#define FFMPEG_CONFIGURATION "--disable-everything --disable-protocols --disable-demuxers --disable-muxers --disable-filters --disable-programs --disable-doc --disable-parsers --enable-parser=vp8 --enable-parser=vp9 --enable-decoder=vp8 --enable-decoder=vp9 --disable-static --enable-shared --disable-debug --disable-sdl2 --disable-libxcb --disable-securetransport --disable-iconv --disable-swresample --disable-swscale --disable-avdevice --disable-avfilter --disable-avformat --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vdpau --disable-videotoolbox --enable-decoder=flac --disable-asm --disable-x86asm --disable-cuda --disable-cuvid --enable-decoder=mp3"
#define FFMPEG_LICENSE "LGPL version 2.1 or later"
#define CONFIG_THIS_YEAR 2018
#define FFMPEG_DATADIR "/usr/local/share/ffmpeg"
@ -539,7 +539,7 @@
#define CONFIG_FFPLAY 0
#define CONFIG_FFPROBE 0
#define CONFIG_FFMPEG 0
#define CONFIG_DCT 0
#define CONFIG_DCT 1
#define CONFIG_DWT 0
#define CONFIG_ERROR_RESILIENCE 0
#define CONFIG_FAAN 1
@ -614,9 +614,9 @@
#define CONFIG_LZF 0
#define CONFIG_ME_CMP 0
#define CONFIG_MPEG_ER 0
#define CONFIG_MPEGAUDIO 0
#define CONFIG_MPEGAUDIODSP 0
#define CONFIG_MPEGAUDIOHEADER 0
#define CONFIG_MPEGAUDIO 1
#define CONFIG_MPEGAUDIODSP 1
#define CONFIG_MPEGAUDIOHEADER 1
#define CONFIG_MPEGVIDEO 0
#define CONFIG_MPEGVIDEOENC 0
#define CONFIG_MSS34DSP 0
@ -655,4 +655,5 @@
#define CONFIG_FLAC_DECODER 1
#define CONFIG_VP8_PARSER 1
#define CONFIG_VP9_PARSER 1
#define CONFIG_MP3_DECODER 1
#endif /* FFMPEG_CONFIG_H */

View File

@ -524,7 +524,7 @@
%define CONFIG_FFPLAY 0
%define CONFIG_FFPROBE 0
%define CONFIG_FFMPEG 0
%define CONFIG_DCT 0
%define CONFIG_DCT 1
%define CONFIG_DWT 0
%define CONFIG_ERROR_RESILIENCE 0
%define CONFIG_FAAN 1
@ -599,9 +599,9 @@
%define CONFIG_LZF 0
%define CONFIG_ME_CMP 0
%define CONFIG_MPEG_ER 0
%define CONFIG_MPEGAUDIO 0
%define CONFIG_MPEGAUDIODSP 0
%define CONFIG_MPEGAUDIOHEADER 0
%define CONFIG_MPEGAUDIO 1
%define CONFIG_MPEGAUDIODSP 1
%define CONFIG_MPEGAUDIOHEADER 1
%define CONFIG_MPEGVIDEO 0
%define CONFIG_MPEGVIDEOENC 0
%define CONFIG_MSS34DSP 0
@ -640,3 +640,4 @@
%define CONFIG_FLAC_DECODER 1
%define CONFIG_VP8_PARSER 1
%define CONFIG_VP9_PARSER 1
%define CONFIG_MP3_DECODER 1

View File

@ -1,7 +1,7 @@
/* Automatically generated by configure - do not modify! */
#ifndef FFMPEG_CONFIG_H
#define FFMPEG_CONFIG_H
#define FFMPEG_CONFIGURATION "--disable-everything --disable-protocols --disable-demuxers --disable-muxers --disable-filters --disable-programs --disable-doc --disable-parsers --enable-parser=vp8 --enable-parser=vp9 --enable-decoder=vp8 --enable-decoder=vp9 --disable-static --enable-shared --disable-debug --disable-sdl2 --disable-libxcb --disable-securetransport --disable-iconv --disable-swresample --disable-swscale --disable-avdevice --disable-avfilter --disable-avformat --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vdpau --disable-videotoolbox --enable-decoder=flac --enable-asm --enable-x86asm --disable-cuda --disable-cuvid"
#define FFMPEG_CONFIGURATION "--disable-everything --disable-protocols --disable-demuxers --disable-muxers --disable-filters --disable-programs --disable-doc --disable-parsers --enable-parser=vp8 --enable-parser=vp9 --enable-decoder=vp8 --enable-decoder=vp9 --disable-static --enable-shared --disable-debug --disable-sdl2 --disable-libxcb --disable-securetransport --disable-iconv --disable-swresample --disable-swscale --disable-avdevice --disable-avfilter --disable-avformat --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vdpau --disable-videotoolbox --enable-decoder=flac --enable-asm --enable-x86asm --disable-cuda --disable-cuvid --enable-decoder=mp3"
#define FFMPEG_LICENSE "LGPL version 2.1 or later"
#define CONFIG_THIS_YEAR 2018
#define FFMPEG_DATADIR "/usr/local/share/ffmpeg"
@ -539,7 +539,7 @@
#define CONFIG_FFPLAY 0
#define CONFIG_FFPROBE 0
#define CONFIG_FFMPEG 0
#define CONFIG_DCT 0
#define CONFIG_DCT 1
#define CONFIG_DWT 0
#define CONFIG_ERROR_RESILIENCE 0
#define CONFIG_FAAN 1
@ -614,9 +614,9 @@
#define CONFIG_LZF 0
#define CONFIG_ME_CMP 0
#define CONFIG_MPEG_ER 0
#define CONFIG_MPEGAUDIO 0
#define CONFIG_MPEGAUDIODSP 0
#define CONFIG_MPEGAUDIOHEADER 0
#define CONFIG_MPEGAUDIO 1
#define CONFIG_MPEGAUDIODSP 1
#define CONFIG_MPEGAUDIOHEADER 1
#define CONFIG_MPEGVIDEO 0
#define CONFIG_MPEGVIDEOENC 0
#define CONFIG_MSS34DSP 0
@ -655,4 +655,5 @@
#define CONFIG_FLAC_DECODER 1
#define CONFIG_VP8_PARSER 1
#define CONFIG_VP9_PARSER 1
#define CONFIG_MP3_DECODER 1
#endif /* FFMPEG_CONFIG_H */

View File

@ -529,7 +529,7 @@
%define CONFIG_FFPLAY 0
%define CONFIG_FFPROBE 0
%define CONFIG_FFMPEG 0
%define CONFIG_DCT 0
%define CONFIG_DCT 1
%define CONFIG_DWT 0
%define CONFIG_ERROR_RESILIENCE 0
%define CONFIG_FAAN 1
@ -606,9 +606,9 @@
%define CONFIG_LZF 0
%define CONFIG_ME_CMP 0
%define CONFIG_MPEG_ER 0
%define CONFIG_MPEGAUDIO 0
%define CONFIG_MPEGAUDIODSP 0
%define CONFIG_MPEGAUDIOHEADER 0
%define CONFIG_MPEGAUDIO 1
%define CONFIG_MPEGAUDIODSP 1
%define CONFIG_MPEGAUDIOHEADER 1
%define CONFIG_MPEGVIDEO 0
%define CONFIG_MPEGVIDEOENC 0
%define CONFIG_MSS34DSP 0
@ -647,3 +647,4 @@
%define CONFIG_FLAC_DECODER 1
%define CONFIG_VP8_PARSER 1
%define CONFIG_VP9_PARSER 1
%define CONFIG_MP3_DECODER 1

View File

@ -1,7 +1,7 @@
/* Automatically generated by configure - do not modify! */
#ifndef FFMPEG_CONFIG_H
#define FFMPEG_CONFIG_H
#define FFMPEG_CONFIGURATION "--disable-everything --disable-protocols --disable-demuxers --disable-muxers --disable-filters --disable-programs --disable-doc --disable-parsers --enable-parser=vp8 --enable-parser=vp9 --enable-decoder=vp8 --enable-decoder=vp9 --disable-static --enable-shared --disable-debug --disable-sdl2 --disable-libxcb --disable-securetransport --disable-iconv --disable-swresample --disable-swscale --disable-avdevice --disable-avfilter --disable-avformat --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vdpau --disable-videotoolbox --enable-decoder=flac --enable-asm --enable-x86asm --disable-cuda --disable-cuvid --toolchain=msvc"
#define FFMPEG_CONFIGURATION "--disable-everything --disable-protocols --disable-demuxers --disable-muxers --disable-filters --disable-programs --disable-doc --disable-parsers --enable-parser=vp8 --enable-parser=vp9 --enable-decoder=vp8 --enable-decoder=vp9 --disable-static --enable-shared --disable-debug --disable-sdl2 --disable-libxcb --disable-securetransport --disable-iconv --disable-swresample --disable-swscale --disable-avdevice --disable-avfilter --disable-avformat --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vdpau --disable-videotoolbox --enable-decoder=flac --enable-asm --enable-x86asm --disable-cuda --disable-cuvid --enable-decoder=mp3 --toolchain=msvc"
#define FFMPEG_LICENSE "LGPL version 2.1 or later"
#define CONFIG_THIS_YEAR 2018
#define FFMPEG_DATADIR "/usr/local/share/ffmpeg"
@ -544,7 +544,7 @@
#define CONFIG_FFPLAY 0
#define CONFIG_FFPROBE 0
#define CONFIG_FFMPEG 0
#define CONFIG_DCT 0
#define CONFIG_DCT 1
#define CONFIG_DWT 0
#define CONFIG_ERROR_RESILIENCE 0
#define CONFIG_FAAN 1
@ -621,9 +621,9 @@
#define CONFIG_LZF 0
#define CONFIG_ME_CMP 0
#define CONFIG_MPEG_ER 0
#define CONFIG_MPEGAUDIO 0
#define CONFIG_MPEGAUDIODSP 0
#define CONFIG_MPEGAUDIOHEADER 0
#define CONFIG_MPEGAUDIO 1
#define CONFIG_MPEGAUDIODSP 1
#define CONFIG_MPEGAUDIOHEADER 1
#define CONFIG_MPEGVIDEO 0
#define CONFIG_MPEGVIDEOENC 0
#define CONFIG_MSS34DSP 0
@ -662,4 +662,5 @@
#define CONFIG_FLAC_DECODER 1
#define CONFIG_VP8_PARSER 1
#define CONFIG_VP9_PARSER 1
#define CONFIG_MP3_DECODER 1
#endif /* FFMPEG_CONFIG_H */

View File

@ -529,7 +529,7 @@
%define CONFIG_FFPLAY 0
%define CONFIG_FFPROBE 0
%define CONFIG_FFMPEG 0
%define CONFIG_DCT 0
%define CONFIG_DCT 1
%define CONFIG_DWT 0
%define CONFIG_ERROR_RESILIENCE 0
%define CONFIG_FAAN 1
@ -606,9 +606,9 @@
%define CONFIG_LZF 0
%define CONFIG_ME_CMP 0
%define CONFIG_MPEG_ER 0
%define CONFIG_MPEGAUDIO 0
%define CONFIG_MPEGAUDIODSP 0
%define CONFIG_MPEGAUDIOHEADER 0
%define CONFIG_MPEGAUDIO 1
%define CONFIG_MPEGAUDIODSP 1
%define CONFIG_MPEGAUDIOHEADER 1
%define CONFIG_MPEGVIDEO 0
%define CONFIG_MPEGVIDEOENC 0
%define CONFIG_MSS34DSP 0
@ -647,3 +647,4 @@
%define CONFIG_FLAC_DECODER 1
%define CONFIG_VP8_PARSER 1
%define CONFIG_VP9_PARSER 1
%define CONFIG_MP3_DECODER 1

View File

@ -1,7 +1,7 @@
/* Automatically generated by configure - do not modify! */
#ifndef FFMPEG_CONFIG_H
#define FFMPEG_CONFIG_H
#define FFMPEG_CONFIGURATION "--disable-everything --disable-protocols --disable-demuxers --disable-muxers --disable-filters --disable-programs --disable-doc --disable-parsers --enable-parser=vp8 --enable-parser=vp9 --enable-decoder=vp8 --enable-decoder=vp9 --disable-static --enable-shared --disable-debug --disable-sdl2 --disable-libxcb --disable-securetransport --disable-iconv --disable-swresample --disable-swscale --disable-avdevice --disable-avfilter --disable-avformat --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vdpau --disable-videotoolbox --enable-decoder=flac --enable-asm --enable-x86asm --toolchain=msvc --disable-cuda --disable-cuvid"
#define FFMPEG_CONFIGURATION "--disable-everything --disable-protocols --disable-demuxers --disable-muxers --disable-filters --disable-programs --disable-doc --disable-parsers --enable-parser=vp8 --enable-parser=vp9 --enable-decoder=vp8 --enable-decoder=vp9 --disable-static --enable-shared --disable-debug --disable-sdl2 --disable-libxcb --disable-securetransport --disable-iconv --disable-swresample --disable-swscale --disable-avdevice --disable-avfilter --disable-avformat --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vdpau --disable-videotoolbox --enable-decoder=flac --enable-asm --enable-x86asm --toolchain=msvc --disable-cuda --disable-cuvid --enable-decoder=mp3"
#define FFMPEG_LICENSE "LGPL version 2.1 or later"
#define CONFIG_THIS_YEAR 2018
#define FFMPEG_DATADIR "/usr/local/share/ffmpeg"
@ -544,7 +544,7 @@
#define CONFIG_FFPLAY 0
#define CONFIG_FFPROBE 0
#define CONFIG_FFMPEG 0
#define CONFIG_DCT 0
#define CONFIG_DCT 1
#define CONFIG_DWT 0
#define CONFIG_ERROR_RESILIENCE 0
#define CONFIG_FAAN 1
@ -621,9 +621,9 @@
#define CONFIG_LZF 0
#define CONFIG_ME_CMP 0
#define CONFIG_MPEG_ER 0
#define CONFIG_MPEGAUDIO 0
#define CONFIG_MPEGAUDIODSP 0
#define CONFIG_MPEGAUDIOHEADER 0
#define CONFIG_MPEGAUDIO 1
#define CONFIG_MPEGAUDIODSP 1
#define CONFIG_MPEGAUDIOHEADER 1
#define CONFIG_MPEGVIDEO 0
#define CONFIG_MPEGVIDEOENC 0
#define CONFIG_MSS34DSP 0
@ -662,4 +662,5 @@
#define CONFIG_FLAC_DECODER 1
#define CONFIG_VP8_PARSER 1
#define CONFIG_VP9_PARSER 1
#define CONFIG_MP3_DECODER 1
#endif /* FFMPEG_CONFIG_H */

View File

@ -922,7 +922,6 @@
%define CONFIG_MP3ADU_DECODER 0
%define CONFIG_MP3ADUFLOAT_DECODER 0
%define CONFIG_MP3_AT_DECODER 0
%define CONFIG_MP3_DECODER 0
%define CONFIG_MP3_DEMUXER 0
%define CONFIG_MP3FLOAT_DECODER 0
%define CONFIG_MP3_HEADER_DECOMPRESS_BSF 0

View File

@ -922,7 +922,6 @@
#define CONFIG_MP3ADU_DECODER 0
#define CONFIG_MP3ADUFLOAT_DECODER 0
#define CONFIG_MP3_AT_DECODER 0
#define CONFIG_MP3_DECODER 0
#define CONFIG_MP3_DEMUXER 0
#define CONFIG_MP3FLOAT_DECODER 0
#define CONFIG_MP3_HEADER_DECOMPRESS_BSF 0

View File

@ -0,0 +1,129 @@
/*
* Copyright (c) 2014 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "avcodec.h"
#include "idctdsp.h"
#include "fdctdsp.h"
#include "pixblockdsp.h"
#include "avdct.h"
#define OFFSET(x) offsetof(AVDCT,x)
#define DEFAULT 0 //should be NAN but it does not work as it is not a constant in glibc as required by ANSI/ISO C
//these names are too long to be readable
#define V AV_OPT_FLAG_VIDEO_PARAM
#define A AV_OPT_FLAG_AUDIO_PARAM
#define E AV_OPT_FLAG_ENCODING_PARAM
#define D AV_OPT_FLAG_DECODING_PARAM
static const AVOption avdct_options[] = {
{"dct", "DCT algorithm", OFFSET(dct_algo), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|E, "dct"},
{"auto", "autoselect a good one", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_AUTO }, INT_MIN, INT_MAX, V|E, "dct"},
{"fastint", "fast integer (experimental / for debugging)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FASTINT }, INT_MIN, INT_MAX, V|E, "dct"},
{"int", "accurate integer", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_INT }, INT_MIN, INT_MAX, V|E, "dct"},
{"mmx", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_MMX }, INT_MIN, INT_MAX, V|E, "dct"},
{"altivec", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_ALTIVEC }, INT_MIN, INT_MAX, V|E, "dct"},
{"faan", "floating point AAN DCT (experimental / for debugging)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FAAN }, INT_MIN, INT_MAX, V|E, "dct"},
{"idct", "select IDCT implementation", OFFSET(idct_algo), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|E|D, "idct"},
{"auto", "autoselect a good one", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_AUTO }, INT_MIN, INT_MAX, V|E|D, "idct"},
{"int", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_INT }, INT_MIN, INT_MAX, V|E|D, "idct"},
{"simple", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLE }, INT_MIN, INT_MAX, V|E|D, "idct"},
{"simplemmx", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEMMX }, INT_MIN, INT_MAX, V|E|D, "idct"},
{"arm", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_ARM }, INT_MIN, INT_MAX, V|E|D, "idct"},
{"altivec", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_ALTIVEC }, INT_MIN, INT_MAX, V|E|D, "idct"},
{"simplearm", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARM }, INT_MIN, INT_MAX, V|E|D, "idct"},
{"simplearmv5te", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV5TE }, INT_MIN, INT_MAX, V|E|D, "idct"},
{"simplearmv6", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV6 }, INT_MIN, INT_MAX, V|E|D, "idct"},
{"simpleneon", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLENEON }, INT_MIN, INT_MAX, V|E|D, "idct"},
{"xvid", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVID }, INT_MIN, INT_MAX, V|E|D, "idct"},
{"xvidmmx", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVID }, INT_MIN, INT_MAX, V|E|D, "idct"},
{"faani", "floating point AAN IDCT (experimental / for debugging)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_FAAN }, INT_MIN, INT_MAX, V|D|E, "idct"},
{"simpleauto", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEAUTO }, INT_MIN, INT_MAX, V|E|D, "idct"},
{"bits_per_sample", "", OFFSET(bits_per_sample), AV_OPT_TYPE_INT, {.i64 = 8 }, 0, 14, 0,},
{NULL},
};
static const AVClass avdct_class = {
.class_name = "AVDCT",
.option = avdct_options,
.version = LIBAVUTIL_VERSION_INT,
};
const AVClass *avcodec_dct_get_class(void)
{
return &avdct_class;
}
AVDCT *avcodec_dct_alloc(void)
{
AVDCT *dsp = av_mallocz(sizeof(AVDCT));
if (!dsp)
return NULL;
dsp->av_class = &avdct_class;
av_opt_set_defaults(dsp);
return dsp;
}
int avcodec_dct_init(AVDCT *dsp)
{
AVCodecContext *avctx = avcodec_alloc_context3(NULL);
if (!avctx)
return AVERROR(ENOMEM);
avctx->idct_algo = dsp->idct_algo;
avctx->dct_algo = dsp->dct_algo;
avctx->bits_per_raw_sample = dsp->bits_per_sample;
#define COPY(src, name) memcpy(&dsp->name, &src.name, sizeof(dsp->name))
#if CONFIG_IDCTDSP
{
IDCTDSPContext idsp;
ff_idctdsp_init(&idsp, avctx);
COPY(idsp, idct);
COPY(idsp, idct_permutation);
}
#endif
#if CONFIG_FDCTDSP
{
FDCTDSPContext fdsp;
ff_fdctdsp_init(&fdsp, avctx);
COPY(fdsp, fdct);
}
#endif
#if CONFIG_PIXBLOCKDSP
{
PixblockDSPContext pdsp;
ff_pixblockdsp_init(&pdsp, avctx);
COPY(pdsp, get_pixels);
}
#endif
avcodec_free_context(&avctx);
return 0;
}

View File

@ -0,0 +1,84 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AVDCT_H
#define AVCODEC_AVDCT_H
#include "libavutil/opt.h"
/**
* AVDCT context.
* @note function pointers can be NULL if the specific features have been
* disabled at build time.
*/
typedef struct AVDCT {
const AVClass *av_class;
void (*idct)(int16_t *block /* align 16 */);
/**
* IDCT input permutation.
* Several optimized IDCTs need a permutated input (relative to the
* normal order of the reference IDCT).
* This permutation must be performed before the idct_put/add.
* Note, normally this can be merged with the zigzag/alternate scan<br>
* An example to avoid confusion:
* - (->decode coeffs -> zigzag reorder -> dequant -> reference IDCT -> ...)
* - (x -> reference DCT -> reference IDCT -> x)
* - (x -> reference DCT -> simple_mmx_perm = idct_permutation
* -> simple_idct_mmx -> x)
* - (-> decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant
* -> simple_idct_mmx -> ...)
*/
uint8_t idct_permutation[64];
void (*fdct)(int16_t *block /* align 16 */);
/**
* DCT algorithm.
* must use AVOptions to set this field.
*/
int dct_algo;
/**
* IDCT algorithm.
* must use AVOptions to set this field.
*/
int idct_algo;
void (*get_pixels)(int16_t *block /* align 16 */,
const uint8_t *pixels /* align 8 */,
ptrdiff_t line_size);
int bits_per_sample;
} AVDCT;
/**
* Allocates a AVDCT context.
* This needs to be initialized with avcodec_dct_init() after optionally
* configuring it with AVOptions.
*
* To free it use av_free()
*/
AVDCT *avcodec_dct_alloc(void);
int avcodec_dct_init(AVDCT *);
const AVClass *avcodec_dct_get_class(void);
#endif /* AVCODEC_AVDCT_H */

View File

@ -7,5 +7,8 @@ static const AVCodec * const codec_list[] = {
#endif
#if CONFIG_FLAC_DECODER
&ff_flac_decoder,
#endif
#if CONFIG_MP3_DECODER
&ff_mp3_decoder,
#endif
NULL };

View File

@ -0,0 +1,224 @@
/*
* (I)DCT Transforms
* Copyright (c) 2009 Peter Ross <pross@xvid.org>
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
* Copyright (c) 2010 Vitor Sessak
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* (Inverse) Discrete Cosine Transforms. These are also known as the
* type II and type III DCTs respectively.
*/
#include <math.h>
#include <string.h>
#include "libavutil/mathematics.h"
#include "dct.h"
#include "dct32.h"
/* sin((M_PI * x / (2 * n)) */
#define SIN(s, n, x) (s->costab[(n) - (x)])
/* cos((M_PI * x / (2 * n)) */
#define COS(s, n, x) (s->costab[x])
static void dst_calc_I_c(DCTContext *ctx, FFTSample *data)
{
int n = 1 << ctx->nbits;
int i;
data[0] = 0;
for (i = 1; i < n / 2; i++) {
float tmp1 = data[i ];
float tmp2 = data[n - i];
float s = SIN(ctx, n, 2 * i);
s *= tmp1 + tmp2;
tmp1 = (tmp1 - tmp2) * 0.5f;
data[i] = s + tmp1;
data[n - i] = s - tmp1;
}
data[n / 2] *= 2;
ctx->rdft.rdft_calc(&ctx->rdft, data);
data[0] *= 0.5f;
for (i = 1; i < n - 2; i += 2) {
data[i + 1] += data[i - 1];
data[i] = -data[i + 2];
}
data[n - 1] = 0;
}
static void dct_calc_I_c(DCTContext *ctx, FFTSample *data)
{
int n = 1 << ctx->nbits;
int i;
float next = -0.5f * (data[0] - data[n]);
for (i = 0; i < n / 2; i++) {
float tmp1 = data[i];
float tmp2 = data[n - i];
float s = SIN(ctx, n, 2 * i);
float c = COS(ctx, n, 2 * i);
c *= tmp1 - tmp2;
s *= tmp1 - tmp2;
next += c;
tmp1 = (tmp1 + tmp2) * 0.5f;
data[i] = tmp1 - s;
data[n - i] = tmp1 + s;
}
ctx->rdft.rdft_calc(&ctx->rdft, data);
data[n] = data[1];
data[1] = next;
for (i = 3; i <= n; i += 2)
data[i] = data[i - 2] - data[i];
}
static void dct_calc_III_c(DCTContext *ctx, FFTSample *data)
{
int n = 1 << ctx->nbits;
int i;
float next = data[n - 1];
float inv_n = 1.0f / n;
for (i = n - 2; i >= 2; i -= 2) {
float val1 = data[i];
float val2 = data[i - 1] - data[i + 1];
float c = COS(ctx, n, i);
float s = SIN(ctx, n, i);
data[i] = c * val1 + s * val2;
data[i + 1] = s * val1 - c * val2;
}
data[1] = 2 * next;
ctx->rdft.rdft_calc(&ctx->rdft, data);
for (i = 0; i < n / 2; i++) {
float tmp1 = data[i] * inv_n;
float tmp2 = data[n - i - 1] * inv_n;
float csc = ctx->csc2[i] * (tmp1 - tmp2);
tmp1 += tmp2;
data[i] = tmp1 + csc;
data[n - i - 1] = tmp1 - csc;
}
}
static void dct_calc_II_c(DCTContext *ctx, FFTSample *data)
{
int n = 1 << ctx->nbits;
int i;
float next;
for (i = 0; i < n / 2; i++) {
float tmp1 = data[i];
float tmp2 = data[n - i - 1];
float s = SIN(ctx, n, 2 * i + 1);
s *= tmp1 - tmp2;
tmp1 = (tmp1 + tmp2) * 0.5f;
data[i] = tmp1 + s;
data[n-i-1] = tmp1 - s;
}
ctx->rdft.rdft_calc(&ctx->rdft, data);
next = data[1] * 0.5;
data[1] *= -1;
for (i = n - 2; i >= 0; i -= 2) {
float inr = data[i ];
float ini = data[i + 1];
float c = COS(ctx, n, i);
float s = SIN(ctx, n, i);
data[i] = c * inr + s * ini;
data[i + 1] = next;
next += s * inr - c * ini;
}
}
static void dct32_func(DCTContext *ctx, FFTSample *data)
{
ctx->dct32(data, data);
}
av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
{
int n = 1 << nbits;
int i;
memset(s, 0, sizeof(*s));
s->nbits = nbits;
s->inverse = inverse;
if (inverse == DCT_II && nbits == 5) {
s->dct_calc = dct32_func;
} else {
ff_init_ff_cos_tabs(nbits + 2);
s->costab = ff_cos_tabs[nbits + 2];
s->csc2 = av_malloc_array(n / 2, sizeof(FFTSample));
if (!s->csc2)
return AVERROR(ENOMEM);
if (ff_rdft_init(&s->rdft, nbits, inverse == DCT_III) < 0) {
av_freep(&s->csc2);
return -1;
}
for (i = 0; i < n / 2; i++)
s->csc2[i] = 0.5 / sin((M_PI / (2 * n) * (2 * i + 1)));
switch (inverse) {
case DCT_I : s->dct_calc = dct_calc_I_c; break;
case DCT_II : s->dct_calc = dct_calc_II_c; break;
case DCT_III: s->dct_calc = dct_calc_III_c; break;
case DST_I : s->dct_calc = dst_calc_I_c; break;
}
}
s->dct32 = ff_dct32_float;
if (ARCH_X86)
ff_dct_init_x86(s);
return 0;
}
av_cold void ff_dct_end(DCTContext *s)
{
ff_rdft_end(&s->rdft);
av_freep(&s->csc2);
}

View File

@ -0,0 +1,25 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_DCT32_H
#define AVCODEC_DCT32_H
void ff_dct32_float(float *dst, const float *src);
void ff_dct32_fixed(int *dst, const int *src);
#endif /* AVCODEC_DCT32_H */

View File

@ -0,0 +1,20 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define DCT32_FLOAT 0
#include "dct32_template.c"

View File

@ -0,0 +1,20 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define DCT32_FLOAT 1
#include "dct32_template.c"

View File

@ -0,0 +1,288 @@
/*
* Template for the Discrete Cosine Transform for 32 samples
* Copyright (c) 2001, 2002 Fabrice Bellard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "dct32.h"
#include "mathops.h"
#include "libavutil/internal.h"
#ifdef CHECKED
#define SUINT int
#define SUINT32 int32_t
#else
#define SUINT unsigned
#define SUINT32 uint32_t
#endif
#if DCT32_FLOAT
# define dct32 ff_dct32_float
# define FIXHR(x) ((float)(x))
# define MULH3(x, y, s) ((s)*(y)*(x))
# define INTFLOAT float
# define SUINTFLOAT float
#else
# define dct32 ff_dct32_fixed
# define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5))
# define MULH3(x, y, s) MULH((s)*(x), y)
# define INTFLOAT int
# define SUINTFLOAT SUINT
#endif
/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
/* cos(i*pi/64) */
#define COS0_0 FIXHR(0.50060299823519630134/2)
#define COS0_1 FIXHR(0.50547095989754365998/2)
#define COS0_2 FIXHR(0.51544730992262454697/2)
#define COS0_3 FIXHR(0.53104259108978417447/2)
#define COS0_4 FIXHR(0.55310389603444452782/2)
#define COS0_5 FIXHR(0.58293496820613387367/2)
#define COS0_6 FIXHR(0.62250412303566481615/2)
#define COS0_7 FIXHR(0.67480834145500574602/2)
#define COS0_8 FIXHR(0.74453627100229844977/2)
#define COS0_9 FIXHR(0.83934964541552703873/2)
#define COS0_10 FIXHR(0.97256823786196069369/2)
#define COS0_11 FIXHR(1.16943993343288495515/4)
#define COS0_12 FIXHR(1.48416461631416627724/4)
#define COS0_13 FIXHR(2.05778100995341155085/8)
#define COS0_14 FIXHR(3.40760841846871878570/8)
#define COS0_15 FIXHR(10.19000812354805681150/32)
#define COS1_0 FIXHR(0.50241928618815570551/2)
#define COS1_1 FIXHR(0.52249861493968888062/2)
#define COS1_2 FIXHR(0.56694403481635770368/2)
#define COS1_3 FIXHR(0.64682178335999012954/2)
#define COS1_4 FIXHR(0.78815462345125022473/2)
#define COS1_5 FIXHR(1.06067768599034747134/4)
#define COS1_6 FIXHR(1.72244709823833392782/4)
#define COS1_7 FIXHR(5.10114861868916385802/16)
#define COS2_0 FIXHR(0.50979557910415916894/2)
#define COS2_1 FIXHR(0.60134488693504528054/2)
#define COS2_2 FIXHR(0.89997622313641570463/2)
#define COS2_3 FIXHR(2.56291544774150617881/8)
#define COS3_0 FIXHR(0.54119610014619698439/2)
#define COS3_1 FIXHR(1.30656296487637652785/4)
#define COS4_0 FIXHR(M_SQRT1_2/2)
/* butterfly operator */
#define BF(a, b, c, s)\
{\
tmp0 = val##a + val##b;\
tmp1 = val##a - val##b;\
val##a = tmp0;\
val##b = MULH3(tmp1, c, 1<<(s));\
}
#define BF0(a, b, c, s)\
{\
tmp0 = tab[a] + tab[b];\
tmp1 = tab[a] - tab[b];\
val##a = tmp0;\
val##b = MULH3(tmp1, c, 1<<(s));\
}
#define BF1(a, b, c, d)\
{\
BF(a, b, COS4_0, 1);\
BF(c, d,-COS4_0, 1);\
val##c += val##d;\
}
#define BF2(a, b, c, d)\
{\
BF(a, b, COS4_0, 1);\
BF(c, d,-COS4_0, 1);\
val##c += val##d;\
val##a += val##c;\
val##c += val##b;\
val##b += val##d;\
}
#define ADD(a, b) val##a += val##b
/* DCT32 without 1/sqrt(2) coef zero scaling. */
void dct32(INTFLOAT *out, const INTFLOAT *tab_arg)
{
const SUINTFLOAT *tab = tab_arg;
SUINTFLOAT tmp0, tmp1;
SUINTFLOAT val0 , val1 , val2 , val3 , val4 , val5 , val6 , val7 ,
val8 , val9 , val10, val11, val12, val13, val14, val15,
val16, val17, val18, val19, val20, val21, val22, val23,
val24, val25, val26, val27, val28, val29, val30, val31;
/* pass 1 */
BF0( 0, 31, COS0_0 , 1);
BF0(15, 16, COS0_15, 5);
/* pass 2 */
BF( 0, 15, COS1_0 , 1);
BF(16, 31,-COS1_0 , 1);
/* pass 1 */
BF0( 7, 24, COS0_7 , 1);
BF0( 8, 23, COS0_8 , 1);
/* pass 2 */
BF( 7, 8, COS1_7 , 4);
BF(23, 24,-COS1_7 , 4);
/* pass 3 */
BF( 0, 7, COS2_0 , 1);
BF( 8, 15,-COS2_0 , 1);
BF(16, 23, COS2_0 , 1);
BF(24, 31,-COS2_0 , 1);
/* pass 1 */
BF0( 3, 28, COS0_3 , 1);
BF0(12, 19, COS0_12, 2);
/* pass 2 */
BF( 3, 12, COS1_3 , 1);
BF(19, 28,-COS1_3 , 1);
/* pass 1 */
BF0( 4, 27, COS0_4 , 1);
BF0(11, 20, COS0_11, 2);
/* pass 2 */
BF( 4, 11, COS1_4 , 1);
BF(20, 27,-COS1_4 , 1);
/* pass 3 */
BF( 3, 4, COS2_3 , 3);
BF(11, 12,-COS2_3 , 3);
BF(19, 20, COS2_3 , 3);
BF(27, 28,-COS2_3 , 3);
/* pass 4 */
BF( 0, 3, COS3_0 , 1);
BF( 4, 7,-COS3_0 , 1);
BF( 8, 11, COS3_0 , 1);
BF(12, 15,-COS3_0 , 1);
BF(16, 19, COS3_0 , 1);
BF(20, 23,-COS3_0 , 1);
BF(24, 27, COS3_0 , 1);
BF(28, 31,-COS3_0 , 1);
/* pass 1 */
BF0( 1, 30, COS0_1 , 1);
BF0(14, 17, COS0_14, 3);
/* pass 2 */
BF( 1, 14, COS1_1 , 1);
BF(17, 30,-COS1_1 , 1);
/* pass 1 */
BF0( 6, 25, COS0_6 , 1);
BF0( 9, 22, COS0_9 , 1);
/* pass 2 */
BF( 6, 9, COS1_6 , 2);
BF(22, 25,-COS1_6 , 2);
/* pass 3 */
BF( 1, 6, COS2_1 , 1);
BF( 9, 14,-COS2_1 , 1);
BF(17, 22, COS2_1 , 1);
BF(25, 30,-COS2_1 , 1);
/* pass 1 */
BF0( 2, 29, COS0_2 , 1);
BF0(13, 18, COS0_13, 3);
/* pass 2 */
BF( 2, 13, COS1_2 , 1);
BF(18, 29,-COS1_2 , 1);
/* pass 1 */
BF0( 5, 26, COS0_5 , 1);
BF0(10, 21, COS0_10, 1);
/* pass 2 */
BF( 5, 10, COS1_5 , 2);
BF(21, 26,-COS1_5 , 2);
/* pass 3 */
BF( 2, 5, COS2_2 , 1);
BF(10, 13,-COS2_2 , 1);
BF(18, 21, COS2_2 , 1);
BF(26, 29,-COS2_2 , 1);
/* pass 4 */
BF( 1, 2, COS3_1 , 2);
BF( 5, 6,-COS3_1 , 2);
BF( 9, 10, COS3_1 , 2);
BF(13, 14,-COS3_1 , 2);
BF(17, 18, COS3_1 , 2);
BF(21, 22,-COS3_1 , 2);
BF(25, 26, COS3_1 , 2);
BF(29, 30,-COS3_1 , 2);
/* pass 5 */
BF1( 0, 1, 2, 3);
BF2( 4, 5, 6, 7);
BF1( 8, 9, 10, 11);
BF2(12, 13, 14, 15);
BF1(16, 17, 18, 19);
BF2(20, 21, 22, 23);
BF1(24, 25, 26, 27);
BF2(28, 29, 30, 31);
/* pass 6 */
ADD( 8, 12);
ADD(12, 10);
ADD(10, 14);
ADD(14, 9);
ADD( 9, 13);
ADD(13, 11);
ADD(11, 15);
out[ 0] = val0;
out[16] = val1;
out[ 8] = val2;
out[24] = val3;
out[ 4] = val4;
out[20] = val5;
out[12] = val6;
out[28] = val7;
out[ 2] = val8;
out[18] = val9;
out[10] = val10;
out[26] = val11;
out[ 6] = val12;
out[22] = val13;
out[14] = val14;
out[30] = val15;
ADD(24, 28);
ADD(28, 26);
ADD(26, 30);
ADD(30, 25);
ADD(25, 29);
ADD(29, 27);
ADD(27, 31);
out[ 1] = val16 + val24;
out[17] = val17 + val25;
out[ 9] = val18 + val26;
out[25] = val19 + val27;
out[ 5] = val20 + val28;
out[21] = val21 + val29;
out[13] = val22 + val30;
out[29] = val23 + val31;
out[ 3] = val24 + val20;
out[19] = val25 + val21;
out[11] = val26 + val22;
out[27] = val27 + val23;
out[ 7] = val28 + val18;
out[23] = val29 + val19;
out[15] = val30 + val17;
out[31] = val31;
}

View File

@ -0,0 +1,215 @@
/*
* Floating point AAN DCT
* this implementation is based upon the IJG integer AAN DCT (see jfdctfst.c)
*
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
* Copyright (c) 2003 Roman Shaposhnik
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/**
* @file
* @brief
* Floating point AAN DCT
* @author Michael Niedermayer <michaelni@gmx.at>
*/
#include "faandct.h"
#include "libavutil/internal.h"
#include "libavutil/libm.h"
typedef float FLOAT;
/* numbers generated by arbitrary precision arithmetic followed by truncation
to 36 fractional digits (enough for a 128-bit IEEE quad, see /usr/include/math.h
for this approach). Unfortunately, long double is not always available correctly,
e.g ppc has issues.
TODO: add L suffixes when ppc and toolchains sort out their stuff.
*/
#define B0 1.000000000000000000000000000000000000
#define B1 0.720959822006947913789091890943021267 // (cos(pi*1/16)sqrt(2))^-1
#define B2 0.765366864730179543456919968060797734 // (cos(pi*2/16)sqrt(2))^-1
#define B3 0.850430094767256448766702844371412325 // (cos(pi*3/16)sqrt(2))^-1
#define B4 1.000000000000000000000000000000000000 // (cos(pi*4/16)sqrt(2))^-1
#define B5 1.272758580572833938461007018281767032 // (cos(pi*5/16)sqrt(2))^-1
#define B6 1.847759065022573512256366378793576574 // (cos(pi*6/16)sqrt(2))^-1
#define B7 3.624509785411551372409941227504289587 // (cos(pi*7/16)sqrt(2))^-1
#define A1 M_SQRT1_2 // cos(pi*4/16)
#define A2 0.54119610014619698435 // cos(pi*6/16)sqrt(2)
#define A5 0.38268343236508977170 // cos(pi*6/16)
#define A4 1.30656296487637652774 // cos(pi*2/16)sqrt(2)
static const FLOAT postscale[64]={
B0*B0, B0*B1, B0*B2, B0*B3, B0*B4, B0*B5, B0*B6, B0*B7,
B1*B0, B1*B1, B1*B2, B1*B3, B1*B4, B1*B5, B1*B6, B1*B7,
B2*B0, B2*B1, B2*B2, B2*B3, B2*B4, B2*B5, B2*B6, B2*B7,
B3*B0, B3*B1, B3*B2, B3*B3, B3*B4, B3*B5, B3*B6, B3*B7,
B4*B0, B4*B1, B4*B2, B4*B3, B4*B4, B4*B5, B4*B6, B4*B7,
B5*B0, B5*B1, B5*B2, B5*B3, B5*B4, B5*B5, B5*B6, B5*B7,
B6*B0, B6*B1, B6*B2, B6*B3, B6*B4, B6*B5, B6*B6, B6*B7,
B7*B0, B7*B1, B7*B2, B7*B3, B7*B4, B7*B5, B7*B6, B7*B7,
};
static av_always_inline void row_fdct(FLOAT temp[64], int16_t *data)
{
FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
FLOAT tmp10, tmp11, tmp12, tmp13;
FLOAT z2, z4, z11, z13;
int i;
for (i=0; i<8*8; i+=8) {
tmp0= data[0 + i] + data[7 + i];
tmp7= data[0 + i] - data[7 + i];
tmp1= data[1 + i] + data[6 + i];
tmp6= data[1 + i] - data[6 + i];
tmp2= data[2 + i] + data[5 + i];
tmp5= data[2 + i] - data[5 + i];
tmp3= data[3 + i] + data[4 + i];
tmp4= data[3 + i] - data[4 + i];
tmp10= tmp0 + tmp3;
tmp13= tmp0 - tmp3;
tmp11= tmp1 + tmp2;
tmp12= tmp1 - tmp2;
temp[0 + i]= tmp10 + tmp11;
temp[4 + i]= tmp10 - tmp11;
tmp12 += tmp13;
tmp12 *= A1;
temp[2 + i]= tmp13 + tmp12;
temp[6 + i]= tmp13 - tmp12;
tmp4 += tmp5;
tmp5 += tmp6;
tmp6 += tmp7;
z2= tmp4*(A2+A5) - tmp6*A5;
z4= tmp6*(A4-A5) + tmp4*A5;
tmp5*=A1;
z11= tmp7 + tmp5;
z13= tmp7 - tmp5;
temp[5 + i]= z13 + z2;
temp[3 + i]= z13 - z2;
temp[1 + i]= z11 + z4;
temp[7 + i]= z11 - z4;
}
}
void ff_faandct(int16_t *data)
{
FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
FLOAT tmp10, tmp11, tmp12, tmp13;
FLOAT z2, z4, z11, z13;
FLOAT temp[64];
int i;
emms_c();
row_fdct(temp, data);
for (i=0; i<8; i++) {
tmp0= temp[8*0 + i] + temp[8*7 + i];
tmp7= temp[8*0 + i] - temp[8*7 + i];
tmp1= temp[8*1 + i] + temp[8*6 + i];
tmp6= temp[8*1 + i] - temp[8*6 + i];
tmp2= temp[8*2 + i] + temp[8*5 + i];
tmp5= temp[8*2 + i] - temp[8*5 + i];
tmp3= temp[8*3 + i] + temp[8*4 + i];
tmp4= temp[8*3 + i] - temp[8*4 + i];
tmp10= tmp0 + tmp3;
tmp13= tmp0 - tmp3;
tmp11= tmp1 + tmp2;
tmp12= tmp1 - tmp2;
data[8*0 + i]= lrintf(postscale[8*0 + i] * (tmp10 + tmp11));
data[8*4 + i]= lrintf(postscale[8*4 + i] * (tmp10 - tmp11));
tmp12 += tmp13;
tmp12 *= A1;
data[8*2 + i]= lrintf(postscale[8*2 + i] * (tmp13 + tmp12));
data[8*6 + i]= lrintf(postscale[8*6 + i] * (tmp13 - tmp12));
tmp4 += tmp5;
tmp5 += tmp6;
tmp6 += tmp7;
z2= tmp4*(A2+A5) - tmp6*A5;
z4= tmp6*(A4-A5) + tmp4*A5;
tmp5*=A1;
z11= tmp7 + tmp5;
z13= tmp7 - tmp5;
data[8*5 + i]= lrintf(postscale[8*5 + i] * (z13 + z2));
data[8*3 + i]= lrintf(postscale[8*3 + i] * (z13 - z2));
data[8*1 + i]= lrintf(postscale[8*1 + i] * (z11 + z4));
data[8*7 + i]= lrintf(postscale[8*7 + i] * (z11 - z4));
}
}
void ff_faandct248(int16_t *data)
{
FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
FLOAT tmp10, tmp11, tmp12, tmp13;
FLOAT temp[64];
int i;
emms_c();
row_fdct(temp, data);
for (i=0; i<8; i++) {
tmp0 = temp[8*0 + i] + temp[8*1 + i];
tmp1 = temp[8*2 + i] + temp[8*3 + i];
tmp2 = temp[8*4 + i] + temp[8*5 + i];
tmp3 = temp[8*6 + i] + temp[8*7 + i];
tmp4 = temp[8*0 + i] - temp[8*1 + i];
tmp5 = temp[8*2 + i] - temp[8*3 + i];
tmp6 = temp[8*4 + i] - temp[8*5 + i];
tmp7 = temp[8*6 + i] - temp[8*7 + i];
tmp10 = tmp0 + tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
tmp13 = tmp0 - tmp3;
data[8*0 + i] = lrintf(postscale[8*0 + i] * (tmp10 + tmp11));
data[8*4 + i] = lrintf(postscale[8*4 + i] * (tmp10 - tmp11));
tmp12 += tmp13;
tmp12 *= A1;
data[8*2 + i] = lrintf(postscale[8*2 + i] * (tmp13 + tmp12));
data[8*6 + i] = lrintf(postscale[8*6 + i] * (tmp13 - tmp12));
tmp10 = tmp4 + tmp7;
tmp11 = tmp5 + tmp6;
tmp12 = tmp5 - tmp6;
tmp13 = tmp4 - tmp7;
data[8*1 + i] = lrintf(postscale[8*0 + i] * (tmp10 + tmp11));
data[8*5 + i] = lrintf(postscale[8*4 + i] * (tmp10 - tmp11));
tmp12 += tmp13;
tmp12 *= A1;
data[8*3 + i] = lrintf(postscale[8*2 + i] * (tmp13 + tmp12));
data[8*7 + i] = lrintf(postscale[8*6 + i] * (tmp13 - tmp12));
}
}

View File

@ -0,0 +1,37 @@
/*
* Floating point AAN DCT
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* @brief
* Floating point AAN DCT
* @author Michael Niedermayer <michaelni@gmx.at>
*/
#ifndef AVCODEC_FAANDCT_H
#define AVCODEC_FAANDCT_H
#include <stdint.h>
void ff_faandct(int16_t *data);
void ff_faandct248(int16_t *data);
#endif /* AVCODEC_FAANDCT_H */

View File

@ -0,0 +1,166 @@
/*
* Floating point AAN IDCT
* Copyright (c) 2008 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "faanidct.h"
#include "libavutil/common.h"
/* To allow switching to double. */
typedef float FLOAT;
#define B0 1.0000000000000000000000
#define B1 1.3870398453221474618216 // cos(pi*1/16)sqrt(2)
#define B2 1.3065629648763765278566 // cos(pi*2/16)sqrt(2)
#define B3 1.1758756024193587169745 // cos(pi*3/16)sqrt(2)
#define B4 1.0000000000000000000000 // cos(pi*4/16)sqrt(2)
#define B5 0.7856949583871021812779 // cos(pi*5/16)sqrt(2)
#define B6 0.5411961001461969843997 // cos(pi*6/16)sqrt(2)
#define B7 0.2758993792829430123360 // cos(pi*7/16)sqrt(2)
#define A4 0.70710678118654752438 // cos(pi*4/16)
#define A2 0.92387953251128675613 // cos(pi*2/16)
static const FLOAT prescale[64]={
B0*B0/8, B0*B1/8, B0*B2/8, B0*B3/8, B0*B4/8, B0*B5/8, B0*B6/8, B0*B7/8,
B1*B0/8, B1*B1/8, B1*B2/8, B1*B3/8, B1*B4/8, B1*B5/8, B1*B6/8, B1*B7/8,
B2*B0/8, B2*B1/8, B2*B2/8, B2*B3/8, B2*B4/8, B2*B5/8, B2*B6/8, B2*B7/8,
B3*B0/8, B3*B1/8, B3*B2/8, B3*B3/8, B3*B4/8, B3*B5/8, B3*B6/8, B3*B7/8,
B4*B0/8, B4*B1/8, B4*B2/8, B4*B3/8, B4*B4/8, B4*B5/8, B4*B6/8, B4*B7/8,
B5*B0/8, B5*B1/8, B5*B2/8, B5*B3/8, B5*B4/8, B5*B5/8, B5*B6/8, B5*B7/8,
B6*B0/8, B6*B1/8, B6*B2/8, B6*B3/8, B6*B4/8, B6*B5/8, B6*B6/8, B6*B7/8,
B7*B0/8, B7*B1/8, B7*B2/8, B7*B3/8, B7*B4/8, B7*B5/8, B7*B6/8, B7*B7/8,
};
static inline void p8idct(int16_t data[64], FLOAT temp[64], uint8_t *dest,
ptrdiff_t stride, int x, int y, int type)
{
int i;
FLOAT s04, d04, s17, d17, s26, d26, s53, d53;
FLOAT os07, os16, os25, os34;
FLOAT od07, od16, od25, od34;
for(i=0; i<y*8; i+=y){
s17= temp[1*x + i] + temp[7*x + i];
d17= temp[1*x + i] - temp[7*x + i];
s53= temp[5*x + i] + temp[3*x + i];
d53= temp[5*x + i] - temp[3*x + i];
od07= s17 + s53;
od25= (s17 - s53)*(2*A4);
od34= d17*(2*(B6-A2)) - d53*(2*A2);
od16= d53*(2*(A2-B2)) + d17*(2*A2);
od16 -= od07;
od25 -= od16;
od34 += od25;
s26 = temp[2*x + i] + temp[6*x + i];
d26 = temp[2*x + i] - temp[6*x + i];
d26*= 2*A4;
d26-= s26;
s04= temp[0*x + i] + temp[4*x + i];
d04= temp[0*x + i] - temp[4*x + i];
os07= s04 + s26;
os34= s04 - s26;
os16= d04 + d26;
os25= d04 - d26;
if(type==0){
temp[0*x + i]= os07 + od07;
temp[7*x + i]= os07 - od07;
temp[1*x + i]= os16 + od16;
temp[6*x + i]= os16 - od16;
temp[2*x + i]= os25 + od25;
temp[5*x + i]= os25 - od25;
temp[3*x + i]= os34 - od34;
temp[4*x + i]= os34 + od34;
}else if(type==1){
data[0*x + i]= lrintf(os07 + od07);
data[7*x + i]= lrintf(os07 - od07);
data[1*x + i]= lrintf(os16 + od16);
data[6*x + i]= lrintf(os16 - od16);
data[2*x + i]= lrintf(os25 + od25);
data[5*x + i]= lrintf(os25 - od25);
data[3*x + i]= lrintf(os34 - od34);
data[4*x + i]= lrintf(os34 + od34);
}else if(type==2){
dest[0*stride + i]= av_clip_uint8(((int)dest[0*stride + i]) + lrintf(os07 + od07));
dest[7*stride + i]= av_clip_uint8(((int)dest[7*stride + i]) + lrintf(os07 - od07));
dest[1*stride + i]= av_clip_uint8(((int)dest[1*stride + i]) + lrintf(os16 + od16));
dest[6*stride + i]= av_clip_uint8(((int)dest[6*stride + i]) + lrintf(os16 - od16));
dest[2*stride + i]= av_clip_uint8(((int)dest[2*stride + i]) + lrintf(os25 + od25));
dest[5*stride + i]= av_clip_uint8(((int)dest[5*stride + i]) + lrintf(os25 - od25));
dest[3*stride + i]= av_clip_uint8(((int)dest[3*stride + i]) + lrintf(os34 - od34));
dest[4*stride + i]= av_clip_uint8(((int)dest[4*stride + i]) + lrintf(os34 + od34));
}else{
dest[0*stride + i]= av_clip_uint8(lrintf(os07 + od07));
dest[7*stride + i]= av_clip_uint8(lrintf(os07 - od07));
dest[1*stride + i]= av_clip_uint8(lrintf(os16 + od16));
dest[6*stride + i]= av_clip_uint8(lrintf(os16 - od16));
dest[2*stride + i]= av_clip_uint8(lrintf(os25 + od25));
dest[5*stride + i]= av_clip_uint8(lrintf(os25 - od25));
dest[3*stride + i]= av_clip_uint8(lrintf(os34 - od34));
dest[4*stride + i]= av_clip_uint8(lrintf(os34 + od34));
}
}
}
void ff_faanidct(int16_t block[64]){
FLOAT temp[64];
int i;
emms_c();
for(i=0; i<64; i++)
temp[i] = block[i] * prescale[i];
p8idct(block, temp, NULL, 0, 1, 8, 0);
p8idct(block, temp, NULL, 0, 8, 1, 1);
}
void ff_faanidct_add(uint8_t *dest, ptrdiff_t line_size, int16_t block[64])
{
FLOAT temp[64];
int i;
emms_c();
for(i=0; i<64; i++)
temp[i] = block[i] * prescale[i];
p8idct(block, temp, NULL, 0, 1, 8, 0);
p8idct(NULL , temp, dest, line_size, 8, 1, 2);
}
void ff_faanidct_put(uint8_t *dest, ptrdiff_t line_size, int16_t block[64])
{
FLOAT temp[64];
int i;
emms_c();
for(i=0; i<64; i++)
temp[i] = block[i] * prescale[i];
p8idct(block, temp, NULL, 0, 1, 8, 0);
p8idct(NULL , temp, dest, line_size, 8, 1, 3);
}

View File

@ -0,0 +1,32 @@
/*
* Floating point AAN IDCT
* Copyright (c) 2008 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_FAANIDCT_H
#define AVCODEC_FAANIDCT_H
#include <stddef.h>
#include <stdint.h>
void ff_faanidct(int16_t block[64]);
void ff_faanidct_add(uint8_t *dest, ptrdiff_t line_size, int16_t block[64]);
void ff_faanidct_put(uint8_t *dest, ptrdiff_t line_size, int16_t block[64]);
#endif /* AVCODEC_FAANIDCT_H */

View File

@ -0,0 +1,50 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "avcodec.h"
#include "dct.h"
#include "faandct.h"
#include "fdctdsp.h"
#include "config.h"
av_cold void ff_fdctdsp_init(FDCTDSPContext *c, AVCodecContext *avctx)
{
const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) {
c->fdct = ff_jpeg_fdct_islow_10;
c->fdct248 = ff_fdct248_islow_10;
} else if (avctx->dct_algo == FF_DCT_FASTINT) {
c->fdct = ff_fdct_ifast;
c->fdct248 = ff_fdct_ifast248;
#if CONFIG_FAANDCT
} else if (avctx->dct_algo == FF_DCT_FAAN) {
c->fdct = ff_faandct;
c->fdct248 = ff_faandct248;
#endif /* CONFIG_FAANDCT */
} else {
c->fdct = ff_jpeg_fdct_islow_8; // slow/accurate/default
c->fdct248 = ff_fdct248_islow_8;
}
if (ARCH_PPC)
ff_fdctdsp_init_ppc(c, avctx, high_bit_depth);
if (ARCH_X86)
ff_fdctdsp_init_x86(c, avctx, high_bit_depth);
}

View File

@ -0,0 +1,44 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/fdctdsp.h"
#include "fdct.h"
av_cold void ff_fdctdsp_init_x86(FDCTDSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{
int cpu_flags = av_get_cpu_flags();
const int dct_algo = avctx->dct_algo;
if (!high_bit_depth) {
if ((dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) {
if (INLINE_MMX(cpu_flags))
c->fdct = ff_fdct_mmx;
if (INLINE_MMXEXT(cpu_flags))
c->fdct = ff_fdct_mmxext;
if (INLINE_SSE2(cpu_flags))
c->fdct = ff_fdct_sse2;
}
}
}

View File

@ -0,0 +1,21 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define FFT_FLOAT 0
#define FFT_FIXED_32 0
#include "fft_template.c"

View File

@ -0,0 +1,52 @@
/*
* Copyright (c) 2012
* MIPS Technologies, Inc., California.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Authors: Stanislav Ocovaj (socovaj@mips.com)
* Goran Cordasic (goran@mips.com)
* Djordje Pesut (djordje@mips.com)
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define FFT_FLOAT 0
#define FFT_FIXED_32 1
#include "fft_template.c"

View File

@ -0,0 +1,328 @@
/*
* Copyright (c) 2012
* MIPS Technologies, Inc., California.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Authors: Stanislav Ocovaj (socovaj@mips.com)
* Goran Cordasic (goran@mips.com)
* Djordje Pesut (djordje@mips.com)
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* definitions and initialization of LUT table for FFT
*/
#include "libavcodec/fft_table.h"
const int32_t ff_w_tab_sr[MAX_FFT_SIZE/(4*16)] = {
2147483647, 2147483016, 2147481121, 2147477963, 2147473542, 2147467857, 2147460908, 2147452697,
2147443222, 2147432484, 2147420483, 2147407218, 2147392690, 2147376899, 2147359845, 2147341527,
2147321946, 2147301102, 2147278995, 2147255625, 2147230991, 2147205094, 2147177934, 2147149511,
2147119825, 2147088876, 2147056664, 2147023188, 2146988450, 2146952448, 2146915184, 2146876656,
2146836866, 2146795813, 2146753497, 2146709917, 2146665076, 2146618971, 2146571603, 2146522973,
2146473080, 2146421924, 2146369505, 2146315824, 2146260881, 2146204674, 2146147205, 2146088474,
2146028480, 2145967224, 2145904705, 2145840924, 2145775880, 2145709574, 2145642006, 2145573176,
2145503083, 2145431729, 2145359112, 2145285233, 2145210092, 2145133690, 2145056025, 2144977098,
2144896910, 2144815460, 2144732748, 2144648774, 2144563539, 2144477042, 2144389283, 2144300264,
2144209982, 2144118439, 2144025635, 2143931570, 2143836244, 2143739656, 2143641807, 2143542697,
2143442326, 2143340694, 2143237802, 2143133648, 2143028234, 2142921559, 2142813624, 2142704427,
2142593971, 2142482254, 2142369276, 2142255039, 2142139541, 2142022783, 2141904764, 2141785486,
2141664948, 2141543150, 2141420092, 2141295774, 2141170197, 2141043360, 2140915264, 2140785908,
2140655293, 2140523418, 2140390284, 2140255892, 2140120240, 2139983329, 2139845159, 2139705730,
2139565043, 2139423097, 2139279892, 2139135429, 2138989708, 2138842728, 2138694490, 2138544994,
2138394240, 2138242228, 2138088958, 2137934430, 2137778644, 2137621601, 2137463301, 2137303743,
2137142927, 2136980855, 2136817525, 2136652938, 2136487095, 2136319994, 2136151637, 2135982023,
2135811153, 2135639026, 2135465642, 2135291003, 2135115107, 2134937956, 2134759548, 2134579885,
2134398966, 2134216791, 2134033361, 2133848675, 2133662734, 2133475538, 2133287087, 2133097381,
2132906420, 2132714204, 2132520734, 2132326009, 2132130030, 2131932796, 2131734309, 2131534567,
2131333572, 2131131322, 2130927819, 2130723062, 2130517052, 2130309789, 2130101272, 2129891502,
2129680480, 2129468204, 2129254676, 2129039895, 2128823862, 2128606576, 2128388038, 2128168248,
2127947206, 2127724913, 2127501367, 2127276570, 2127050522, 2126823222, 2126594672, 2126364870,
2126133817, 2125901514, 2125667960, 2125433155, 2125197100, 2124959795, 2124721240, 2124481435,
2124240380, 2123998076, 2123754522, 2123509718, 2123263666, 2123016364, 2122767814, 2122518015,
2122266967, 2122014670, 2121761126, 2121506333, 2121250292, 2120993003, 2120734467, 2120474683,
2120213651, 2119951372, 2119687847, 2119423074, 2119157054, 2118889788, 2118621275, 2118351516,
2118080511, 2117808259, 2117534762, 2117260020, 2116984031, 2116706797, 2116428319, 2116148595,
2115867626, 2115585412, 2115301954, 2115017252, 2114731305, 2114444114, 2114155680, 2113866001,
2113575080, 2113282914, 2112989506, 2112694855, 2112398960, 2112101824, 2111803444, 2111503822,
2111202959, 2110900853, 2110597505, 2110292916, 2109987085, 2109680013, 2109371700, 2109062146,
2108751352, 2108439317, 2108126041, 2107811526, 2107495770, 2107178775, 2106860540, 2106541065,
2106220352, 2105898399, 2105575208, 2105250778, 2104925109, 2104598202, 2104270057, 2103940674,
2103610054, 2103278196, 2102945101, 2102610768, 2102275199, 2101938393, 2101600350, 2101261071,
2100920556, 2100578805, 2100235819, 2099891596, 2099546139, 2099199446, 2098851519, 2098502357,
2098151960, 2097800329, 2097447464, 2097093365, 2096738032, 2096381466, 2096023667, 2095664635,
2095304370, 2094942872, 2094580142, 2094216179, 2093850985, 2093484559, 2093116901, 2092748012,
2092377892, 2092006541, 2091633960, 2091260147, 2090885105, 2090508833, 2090131331, 2089752599,
2089372638, 2088991448, 2088609029, 2088225381, 2087840505, 2087454400, 2087067068, 2086678508,
2086288720, 2085897705, 2085505463, 2085111994, 2084717298, 2084321376, 2083924228, 2083525854,
2083126254, 2082725429, 2082323379, 2081920103, 2081515603, 2081109879, 2080702930, 2080294757,
2079885360, 2079474740, 2079062896, 2078649830, 2078235540, 2077820028, 2077403294, 2076985338,
2076566160, 2076145760, 2075724139, 2075301296, 2074877233, 2074451950, 2074025446, 2073597721,
2073168777, 2072738614, 2072307231, 2071874629, 2071440808, 2071005769, 2070569511, 2070132035,
2069693342, 2069253430, 2068812302, 2068369957, 2067926394, 2067481616, 2067035621, 2066588410,
2066139983, 2065690341, 2065239484, 2064787411, 2064334124, 2063879623, 2063423908, 2062966978,
2062508835, 2062049479, 2061588910, 2061127128, 2060664133, 2060199927, 2059734508, 2059267877,
2058800036, 2058330983, 2057860719, 2057389244, 2056916560, 2056442665, 2055967560, 2055491246,
2055013723, 2054534991, 2054055050, 2053573901, 2053091544, 2052607979, 2052123207, 2051637227,
2051150040, 2050661647, 2050172048, 2049681242, 2049189231, 2048696014, 2048201592, 2047705965,
2047209133, 2046711097, 2046211857, 2045711414, 2045209767, 2044706916, 2044202863, 2043697608,
2043191150, 2042683490, 2042174628, 2041664565, 2041153301, 2040640837, 2040127172, 2039612306,
2039096241, 2038578976, 2038060512, 2037540850, 2037019988, 2036497928, 2035974670, 2035450215,
2034924562, 2034397712, 2033869665, 2033340422, 2032809982, 2032278347, 2031745516, 2031211490,
2030676269, 2030139853, 2029602243, 2029063439, 2028523442, 2027982251, 2027439867, 2026896291,
2026351522, 2025805561, 2025258408, 2024710064, 2024160529, 2023609803, 2023057887, 2022504780,
2021950484, 2021394998, 2020838323, 2020280460, 2019721407, 2019161167, 2018599739, 2018037123,
2017473321, 2016908331, 2016342155, 2015774793, 2015206245, 2014636511, 2014065592, 2013493489,
2012920201, 2012345729, 2011770073, 2011193233, 2010615210, 2010036005, 2009455617, 2008874047,
2008291295, 2007707362, 2007122248, 2006535953, 2005948478, 2005359822, 2004769987, 2004178973,
2003586779, 2002993407, 2002398857, 2001803128, 2001206222, 2000608139, 2000008879, 1999408442,
1998806829, 1998204040, 1997600076, 1996994937, 1996388622, 1995781134, 1995172471, 1994562635,
1993951625, 1993339442, 1992726087, 1992111559, 1991495860, 1990878989, 1990260946, 1989641733,
1989021350, 1988399796, 1987777073, 1987153180, 1986528118, 1985901888, 1985274489, 1984645923,
1984016189, 1983385288, 1982753220, 1982119985, 1981485585, 1980850019, 1980213288, 1979575392,
1978936331, 1978296106, 1977654717, 1977012165, 1976368450, 1975723572, 1975077532, 1974430331,
1973781967, 1973132443, 1972481757, 1971829912, 1971176906, 1970522741, 1969867417, 1969210933,
1968553292, 1967894492, 1967234535, 1966573420, 1965911148, 1965247720, 1964583136, 1963917396,
1963250501, 1962582451, 1961913246, 1961242888, 1960571375, 1959898709, 1959224890, 1958549919,
1957873796, 1957196520, 1956518093, 1955838516, 1955157788, 1954475909, 1953792881, 1953108703,
1952423377, 1951736902, 1951049279, 1950360508, 1949670589, 1948979524, 1948287312, 1947593954,
1946899451, 1946203802, 1945507008, 1944809070, 1944109987, 1943409761, 1942708392, 1942005880,
1941302225, 1940597428, 1939891490, 1939184411, 1938476190, 1937766830, 1937056329, 1936344689,
1935631910, 1934917992, 1934202936, 1933486742, 1932769411, 1932050943, 1931331338, 1930610597,
1929888720, 1929165708, 1928441561, 1927716279, 1926989864, 1926262315, 1925533633, 1924803818,
1924072871, 1923340791, 1922607581, 1921873239, 1921137767, 1920401165, 1919663432, 1918924571,
1918184581, 1917443462, 1916701216, 1915957841, 1915213340, 1914467712, 1913720958, 1912973078,
1912224073, 1911473942, 1910722688, 1909970309, 1909216806, 1908462181, 1907706433, 1906949562,
1906191570, 1905432457, 1904672222, 1903910867, 1903148392, 1902384797, 1901620084, 1900854251,
1900087301, 1899319232, 1898550047, 1897779744, 1897008325, 1896235790, 1895462140, 1894687374,
1893911494, 1893134500, 1892356392, 1891577171, 1890796837, 1890015391, 1889232832, 1888449163,
1887664383, 1886878492, 1886091491, 1885303381, 1884514161, 1883723833, 1882932397, 1882139853,
1881346202, 1880551444, 1879755580, 1878958610, 1878160535, 1877361354, 1876561070, 1875759681,
1874957189, 1874153594, 1873348897, 1872543097, 1871736196, 1870928194, 1870119091, 1869308888,
1868497586, 1867685184, 1866871683, 1866057085, 1865241388, 1864424594, 1863606704, 1862787717,
1861967634, 1861146456, 1860324183, 1859500816, 1858676355, 1857850800, 1857024153, 1856196413,
1855367581, 1854537657, 1853706643, 1852874538, 1852041343, 1851207059, 1850371686, 1849535224,
1848697674, 1847859036, 1847019312, 1846178501, 1845336604, 1844493621, 1843649553, 1842804401,
1841958164, 1841110844, 1840262441, 1839412956, 1838562388, 1837710739, 1836858008, 1836004197,
1835149306, 1834293336, 1833436286, 1832578158, 1831718951, 1830858668, 1829997307, 1829134869,
1828271356, 1827406767, 1826541103, 1825674364, 1824806552, 1823937666, 1823067707, 1822196675,
1821324572, 1820451397, 1819577151, 1818701835, 1817825449, 1816947994, 1816069469, 1815189877,
1814309216, 1813427489, 1812544694, 1811660833, 1810775906, 1809889915, 1809002858, 1808114737,
1807225553, 1806335305, 1805443995, 1804551623, 1803658189, 1802763694, 1801868139, 1800971523,
1800073849, 1799175115, 1798275323, 1797374472, 1796472565, 1795569601, 1794665580, 1793760504,
1792854372, 1791947186, 1791038946, 1790129652, 1789219305, 1788307905, 1787395453, 1786481950,
1785567396, 1784651792, 1783735137, 1782817434, 1781898681, 1780978881, 1780058032, 1779136137,
1778213194, 1777289206, 1776364172, 1775438094, 1774510970, 1773582803, 1772653593, 1771723340,
1770792044, 1769859707, 1768926328, 1767991909, 1767056450, 1766119952, 1765182414, 1764243838,
1763304224, 1762363573, 1761421885, 1760479161, 1759535401, 1758590607, 1757644777, 1756697914,
1755750017, 1754801087, 1753851126, 1752900132, 1751948107, 1750995052, 1750040966, 1749085851,
1748129707, 1747172535, 1746214334, 1745255107, 1744294853, 1743333573, 1742371267, 1741407936,
1740443581, 1739478202, 1738511799, 1737544374, 1736575927, 1735606458, 1734635968, 1733664458,
1732691928, 1731718378, 1730743810, 1729768224, 1728791620, 1727813999, 1726835361, 1725855708,
1724875040, 1723893357, 1722910659, 1721926948, 1720942225, 1719956488, 1718969740, 1717981981,
1716993211, 1716003431, 1715012642, 1714020844, 1713028037, 1712034223, 1711039401, 1710043573,
1709046739, 1708048900, 1707050055, 1706050207, 1705049355, 1704047500, 1703044642, 1702040783,
1701035922, 1700030061, 1699023199, 1698015339, 1697006479, 1695996621, 1694985765, 1693973912,
1692961062, 1691947217, 1690932376, 1689916541, 1688899711, 1687881888, 1686863072, 1685843263,
1684822463, 1683800672, 1682777890, 1681754118, 1680729357, 1679703608, 1678676870, 1677649144,
1676620432, 1675590733, 1674560049, 1673528379, 1672495725, 1671462087, 1670427466, 1669391862,
1668355276, 1667317709, 1666279161, 1665239632, 1664199124, 1663157637, 1662115172, 1661071729,
1660027308, 1658981911, 1657935539, 1656888190, 1655839867, 1654790570, 1653740300, 1652689057,
1651636841, 1650583654, 1649529496, 1648474367, 1647418269, 1646361202, 1645303166, 1644244162,
1643184191, 1642123253, 1641061349, 1639998480, 1638934646, 1637869848, 1636804087, 1635737362,
1634669676, 1633601027, 1632531418, 1631460848, 1630389319, 1629316830, 1628243383, 1627168978,
1626093616, 1625017297, 1623940023, 1622861793, 1621782608, 1620702469, 1619621377, 1618539332,
1617456335, 1616372386, 1615287487, 1614201637, 1613114838, 1612027089, 1610938393, 1609848749,
1608758157, 1607666620, 1606574136, 1605480708, 1604386335, 1603291018, 1602194758, 1601097555,
1599999411, 1598900325, 1597800299, 1596699333, 1595597428, 1594494583, 1593390801, 1592286082,
1591180426, 1590073833, 1588966306, 1587857843, 1586748447, 1585638117, 1584526854, 1583414660,
1582301533, 1581187476, 1580072489, 1578956572, 1577839726, 1576721952, 1575603251, 1574483623,
1573363068, 1572241588, 1571119183, 1569995854, 1568871601, 1567746425, 1566620327, 1565493307,
1564365367, 1563236506, 1562106725, 1560976026, 1559844408, 1558711873, 1557578421, 1556444052,
1555308768, 1554172569, 1553035455, 1551897428, 1550758488, 1549618636, 1548477872, 1547336197,
1546193612, 1545050118, 1543905714, 1542760402, 1541614183, 1540467057, 1539319024, 1538170087,
1537020244, 1535869497, 1534717846, 1533565293, 1532411837, 1531257480, 1530102222, 1528946064,
1527789007, 1526631051, 1525472197, 1524312445, 1523151797, 1521990252, 1520827813, 1519664478,
1518500250, 1517335128, 1516169114, 1515002208, 1513834411, 1512665723, 1511496145, 1510325678,
1509154322, 1507982079, 1506808949, 1505634932, 1504460029, 1503284242, 1502107570, 1500930014,
1499751576, 1498572255, 1497392053, 1496210969, 1495029006, 1493846163, 1492662441, 1491477842,
1490292364, 1489106011, 1487918781, 1486730675, 1485541696, 1484351842, 1483161115, 1481969516,
1480777044, 1479583702, 1478389489, 1477194407, 1475998456, 1474801636, 1473603949, 1472405394,
1471205974, 1470005688, 1468804538, 1467602523, 1466399645, 1465195904, 1463991302, 1462785838,
1461579514, 1460372329, 1459164286, 1457955385, 1456745625, 1455535009, 1454323536, 1453111208,
1451898025, 1450683988, 1449469098, 1448253355, 1447036760, 1445819314, 1444601017, 1443381870,
1442161874, 1440941030, 1439719338, 1438496799, 1437273414, 1436049184, 1434824109, 1433598189,
1432371426, 1431143821, 1429915374, 1428686085, 1427455956, 1426224988, 1424993180, 1423760534,
1422527051, 1421292730, 1420057574, 1418821582, 1417584755, 1416347095, 1415108601, 1413869275,
1412629117, 1411388129, 1410146309, 1408903661, 1407660183, 1406415878, 1405170745, 1403924785,
1402678000, 1401430389, 1400181954, 1398932695, 1397682613, 1396431709, 1395179984, 1393927438,
1392674072, 1391419886, 1390164882, 1388909060, 1387652422, 1386394966, 1385136696, 1383877610,
1382617710, 1381356997, 1380095472, 1378833134, 1377569986, 1376306026, 1375041258, 1373775680,
1372509294, 1371242101, 1369974101, 1368705296, 1367435685, 1366165269, 1364894050, 1363622028,
1362349204, 1361075579, 1359801152, 1358525926, 1357249901, 1355973077, 1354695455, 1353417037,
1352137822, 1350857812, 1349577007, 1348295409, 1347013017, 1345729833, 1344445857, 1343161090,
1341875533, 1340589187, 1339302052, 1338014129, 1336725419, 1335435923, 1334145641, 1332854574,
1331562723, 1330270089, 1328976672, 1327682474, 1326387494, 1325091734, 1323795195, 1322497877,
1321199781, 1319900907, 1318601257, 1317300832, 1315999631, 1314697657, 1313394909, 1312091388,
1310787095, 1309482032, 1308176198, 1306869594, 1305562222, 1304254082, 1302945174, 1301635500,
1300325060, 1299013855, 1297701886, 1296389154, 1295075659, 1293761402, 1292446384, 1291130606,
1289814068, 1288496772, 1287178717, 1285859905, 1284540337, 1283220013, 1281898935, 1280577102,
1279254516, 1277931177, 1276607086, 1275282245, 1273956653, 1272630312, 1271303222, 1269975384,
1268646800, 1267317469, 1265987392, 1264656571, 1263325005, 1261992697, 1260659646, 1259325853,
1257991320, 1256656047, 1255320034, 1253983283, 1252645794, 1251307568, 1249968606, 1248628909,
1247288478, 1245947312, 1244605414, 1243262783, 1241919421, 1240575329, 1239230506, 1237884955,
1236538675, 1235191668, 1233843935, 1232495475, 1231146291, 1229796382, 1228445750, 1227094395,
1225742318, 1224389521, 1223036002, 1221681765, 1220326809, 1218971135, 1217614743, 1216257636,
1214899813, 1213541275, 1212182024, 1210822059, 1209461382, 1208099993, 1206737894, 1205375085,
1204011567, 1202647340, 1201282407, 1199916766, 1198550419, 1197183368, 1195815612, 1194447153,
1193077991, 1191708127, 1190337562, 1188966297, 1187594332, 1186221669, 1184848308, 1183474250,
1182099496, 1180724046, 1179347902, 1177971064, 1176593533, 1175215310, 1173836395, 1172456790,
1171076495, 1169695512, 1168313840, 1166931481, 1165548435, 1164164704, 1162780288, 1161395188,
1160009405, 1158622939, 1157235792, 1155847964, 1154459456, 1153070269, 1151680403, 1150289860,
1148898640, 1147506745, 1146114174, 1144720929, 1143327011, 1141932420, 1140537158, 1139141224,
1137744621, 1136347348, 1134949406, 1133550797, 1132151521, 1130751579, 1129350972, 1127949701,
1126547765, 1125145168, 1123741908, 1122337987, 1120933406, 1119528166, 1118122267, 1116715710,
1115308496, 1113900627, 1112492101, 1111082922, 1109673089, 1108262603, 1106851465, 1105439676,
1104027237, 1102614148, 1101200410, 1099786025, 1098370993, 1096955314, 1095538991, 1094122023,
1092704411, 1091286156, 1089867259, 1088447722, 1087027544, 1085606726, 1084185270, 1082763176,
1081340445, 1079917078, 1078493076, 1077068439, 1075643169, 1074217266, 1072790730, 1071363564,
1069935768, 1068507342, 1067078288, 1065648605, 1064218296, 1062787361, 1061355801, 1059923616,
1058490808, 1057057377, 1055623324, 1054188651, 1052753357, 1051317443, 1049880912, 1048443763,
1047005996, 1045567615, 1044128617, 1042689006, 1041248781, 1039807944, 1038366495, 1036924436,
1035481766, 1034038487, 1032594600, 1031150105, 1029705004, 1028259297, 1026812985, 1025366069,
1023918550, 1022470428, 1021021705, 1019572382, 1018122458, 1016671936, 1015220816, 1013769098,
1012316784, 1010863875, 1009410370, 1007956272, 1006501581, 1005046298, 1003590424, 1002133959,
1000676905, 999219262, 997761031, 996302214, 994842810, 993382821, 991922248, 990461091,
988999351, 987537030, 986074127, 984610645, 983146583, 981681943, 980216726, 978750932,
977284562, 975817617, 974350098, 972882006, 971413342, 969944106, 968474300, 967003923,
965532978, 964061465, 962589385, 961116739, 959643527, 958169751, 956695411, 955220508,
953745043, 952269017, 950792431, 949315286, 947837582, 946359321, 944880503, 943401129,
941921200, 940440717, 938959681, 937478092, 935995952, 934513261, 933030021, 931546231,
930061894, 928577010, 927091579, 925605603, 924119082, 922632018, 921144411, 919656262,
918167572, 916678342, 915188572, 913698265, 912207419, 910716038, 909224120, 907731667,
906238681, 904745161, 903251110, 901756526, 900261413, 898765769, 897269597, 895772898,
894275671, 892777918, 891279640, 889780838, 888281512, 886781663, 885281293, 883780402,
882278992, 880777062, 879274614, 877771649, 876268167, 874764170, 873259659, 871754633,
870249095, 868743045, 867236484, 865729413, 864221832, 862713743, 861205147, 859696043,
858186435, 856676321, 855165703, 853654582, 852142959, 850630835, 849118210, 847605086,
846091463, 844577343, 843062726, 841547612, 840032004, 838515901, 836999305, 835482217,
833964638, 832446567, 830928007, 829408958, 827889422, 826369398, 824848888, 823327893,
821806413, 820284450, 818762005, 817239078, 815715670, 814191782, 812667415, 811142571,
809617249, 808091450, 806565177, 805038429, 803511207, 801983513, 800455346, 798926709,
797397602, 795868026, 794337982, 792807470, 791276492, 789745049, 788213141, 786680769,
785147934, 783614638, 782080880, 780546663, 779011986, 777476851, 775941259, 774405210,
772868706, 771331747, 769794334, 768256469, 766718151, 765179382, 763640164, 762100496,
760560380, 759019816, 757478806, 755937350, 754395449, 752853105, 751310318, 749767089,
748223418, 746679308, 745134758, 743589770, 742044345, 740498483, 738952186, 737405453,
735858287, 734310688, 732762657, 731214195, 729665303, 728115982, 726566232, 725016055,
723465451, 721914422, 720362968, 718811090, 717258790, 715706067, 714152924, 712599360,
711045377, 709490976, 707936158, 706380923, 704825272, 703269207, 701712728, 700155836,
698598533, 697040818, 695482694, 693924160, 692365218, 690805869, 689246113, 687685952,
686125387, 684564417, 683003045, 681441272, 679879097, 678316522, 676753549, 675190177,
673626408, 672062243, 670497682, 668932727, 667367379, 665801638, 664235505, 662668981,
661102068, 659534766, 657967075, 656398998, 654830535, 653261686, 651692453, 650122837,
648552838, 646982457, 645411696, 643840556, 642269036, 640697139, 639124865, 637552215,
635979190, 634405791, 632832018, 631257873, 629683357, 628108471, 626533215, 624957590,
623381598, 621805239, 620228514, 618651424, 617073971, 615496154, 613917975, 612339436,
610760536, 609181276, 607601658, 606021683, 604441352, 602860664, 601279623, 599698227,
598116479, 596534378, 594951927, 593369126, 591785976, 590202477, 588618632, 587034440,
585449903, 583865021, 582279796, 580694229, 579108320, 577522070, 575935480, 574348552,
572761285, 571173682, 569585743, 567997469, 566408860, 564819919, 563230645, 561641039,
560051104, 558460839, 556870245, 555279324, 553688076, 552096502, 550504604, 548912382,
547319836, 545726969, 544133781, 542540273, 540946445, 539352300, 537757837, 536163058,
534567963, 532972554, 531376831, 529780796, 528184449, 526587791, 524990824, 523393547,
521795963, 520198072, 518599875, 517001373, 515402566, 513803457, 512204045, 510604332,
509004318, 507404005, 505803394, 504202485, 502601279, 500999778, 499397982, 497795892,
496193509, 494590835, 492987869, 491384614, 489781069, 488177236, 486573117, 484968710,
483364019, 481759043, 480153784, 478548243, 476942419, 475336316, 473729932, 472123270,
470516330, 468909114, 467301622, 465693854, 464085813, 462477499, 460868912, 459260055,
457650927, 456041530, 454431865, 452821933, 451211734, 449601270, 447990541, 446379549,
444768294, 443156777, 441545000, 439932963, 438320667, 436708113, 435095303, 433482236,
431868915, 430255339, 428641511, 427027430, 425413098, 423798515, 422183684, 420568604,
418953276, 417337703, 415721883, 414105819, 412489512, 410872962, 409256170, 407639137,
406021865, 404404353, 402786604, 401168618, 399550396, 397931939, 396313247, 394694323,
393075166, 391455778, 389836160, 388216313, 386596237, 384975934, 383355404, 381734649,
380113669, 378492466, 376871039, 375249392, 373627523, 372005435, 370383128, 368760603,
367137861, 365514903, 363891730, 362268343, 360644742, 359020930, 357396906, 355772673,
354148230, 352523578, 350898719, 349273654, 347648383, 346022908, 344397230, 342771348,
341145265, 339518981, 337892498, 336265816, 334638936, 333011859, 331384586, 329757119,
328129457, 326501602, 324873555, 323245317, 321616889, 319988272, 318359466, 316730474,
315101295, 313471930, 311842381, 310212649, 308582734, 306952638, 305322361, 303691904,
302061269, 300430456, 298799466, 297168301, 295536961, 293905447, 292273760, 290641901,
289009871, 287377671, 285745302, 284112765, 282480061, 280847190, 279214155, 277580955,
275947592, 274314066, 272680379, 271046532, 269412525, 267778360, 266144038, 264509558,
262874923, 261240134, 259605191, 257970095, 256334847, 254699448, 253063900, 251428203,
249792358, 248156366, 246520228, 244883945, 243247518, 241610947, 239974235, 238337382,
236700388, 235063255, 233425984, 231788575, 230151030, 228513350, 226875535, 225237587,
223599506, 221961294, 220322951, 218684479, 217045878, 215407149, 213768293, 212129312,
210490206, 208850976, 207211624, 205572149, 203932553, 202292838, 200653003, 199013051,
197372981, 195732795, 194092495, 192452080, 190811551, 189170911, 187530159, 185889297,
184248325, 182607245, 180966058, 179324764, 177683365, 176041861, 174400254, 172758544,
171116733, 169474820, 167832808, 166190698, 164548489, 162906184, 161263783, 159621287,
157978697, 156336015, 154693240, 153050374, 151407418, 149764374, 148121241, 146478021,
144834714, 143191323, 141547847, 139904288, 138260647, 136616925, 134973122, 133329239,
131685278, 130041240, 128397125, 126752935, 125108670, 123464332, 121819921, 120175438,
118530885, 116886262, 115241570, 113596810, 111951983, 110307091, 108662134, 107017112,
105372028, 103726882, 102081675, 100436408, 98791081, 97145697, 95500255, 93854758,
92209205, 90563597, 88917937, 87272224, 85626460, 83980645, 82334782, 80688869,
79042909, 77396903, 75750851, 74104755, 72458615, 70812432, 69166208, 67519943,
65873638, 64227295, 62580914, 60934496, 59288042, 57641553, 55995030, 54348475,
52701887, 51055268, 49408620, 47761942, 46115236, 44468503, 42821744, 41174960,
39528151, 37881320, 36234466, 34587590, 32940695, 31293780, 29646846, 27999895,
26352928, 24705945, 23058947, 21411936, 19764913, 18117878, 16470832, 14823776,
13176712, 11529640, 9882561, 8235476, 6588387, 4941294, 3294197, 1647099
};
uint16_t ff_fft_offsets_lut[21845];
void ff_fft_lut_init(uint16_t *table, int off, int size, int *index)
{
if (size < 16) {
table[*index] = off >> 2;
(*index)++;
}
else {
ff_fft_lut_init(table, off, size>>1, index);
ff_fft_lut_init(table, off+(size>>1), size>>2, index);
ff_fft_lut_init(table, off+3*(size>>2), size>>2, index);
}
}

View File

@ -0,0 +1,66 @@
/*
* Copyright (c) 2012
* MIPS Technologies, Inc., California.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Authors: Stanislav Ocovaj (socovaj@mips.com)
* Goran Cordasic (goran@mips.com)
* Djordje Pesut (djordje@mips.com)
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* definitions and tables for FFT
*/
#ifndef AVCODEC_FFT_TABLE_H
#define AVCODEC_FFT_TABLE_H
#include "libavcodec/fft.h"
#define MAX_LOG2_NFFT 17 //!< Specifies maximum allowed fft size
#define MAX_FFT_SIZE (1 << MAX_LOG2_NFFT)
extern const int32_t ff_w_tab_sr[];
extern uint16_t ff_fft_offsets_lut[];
void ff_fft_lut_init(uint16_t *table, int off, int size, int *index);
#endif /* AVCODEC_FFT_TABLE_H */

View File

@ -0,0 +1,321 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/common.h"
#include "avcodec.h"
#include "dct.h"
#include "faanidct.h"
#include "idctdsp.h"
#include "simple_idct.h"
#include "xvididct.h"
av_cold void ff_init_scantable(uint8_t *permutation, ScanTable *st,
const uint8_t *src_scantable)
{
int i, end;
st->scantable = src_scantable;
for (i = 0; i < 64; i++) {
int j = src_scantable[i];
st->permutated[i] = permutation[j];
}
end = -1;
for (i = 0; i < 64; i++) {
int j = st->permutated[i];
if (j > end)
end = j;
st->raster_end[i] = end;
}
}
av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation,
enum idct_permutation_type perm_type)
{
int i;
if (ARCH_X86)
if (ff_init_scantable_permutation_x86(idct_permutation,
perm_type))
return;
switch (perm_type) {
case FF_IDCT_PERM_NONE:
for (i = 0; i < 64; i++)
idct_permutation[i] = i;
break;
case FF_IDCT_PERM_LIBMPEG2:
for (i = 0; i < 64; i++)
idct_permutation[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
break;
case FF_IDCT_PERM_TRANSPOSE:
for (i = 0; i < 64; i++)
idct_permutation[i] = ((i & 7) << 3) | (i >> 3);
break;
case FF_IDCT_PERM_PARTTRANS:
for (i = 0; i < 64; i++)
idct_permutation[i] = (i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3);
break;
default:
av_log(NULL, AV_LOG_ERROR,
"Internal error, IDCT permutation not set\n");
}
}
void ff_put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
ptrdiff_t line_size)
{
int i;
/* read the pixels */
for (i = 0; i < 8; i++) {
pixels[0] = av_clip_uint8(block[0]);
pixels[1] = av_clip_uint8(block[1]);
pixels[2] = av_clip_uint8(block[2]);
pixels[3] = av_clip_uint8(block[3]);
pixels[4] = av_clip_uint8(block[4]);
pixels[5] = av_clip_uint8(block[5]);
pixels[6] = av_clip_uint8(block[6]);
pixels[7] = av_clip_uint8(block[7]);
pixels += line_size;
block += 8;
}
}
static void put_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
int line_size)
{
int i;
/* read the pixels */
for(i=0;i<4;i++) {
pixels[0] = av_clip_uint8(block[0]);
pixels[1] = av_clip_uint8(block[1]);
pixels[2] = av_clip_uint8(block[2]);
pixels[3] = av_clip_uint8(block[3]);
pixels += line_size;
block += 8;
}
}
static void put_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
int line_size)
{
int i;
/* read the pixels */
for(i=0;i<2;i++) {
pixels[0] = av_clip_uint8(block[0]);
pixels[1] = av_clip_uint8(block[1]);
pixels += line_size;
block += 8;
}
}
static void put_signed_pixels_clamped_c(const int16_t *block,
uint8_t *av_restrict pixels,
ptrdiff_t line_size)
{
int i, j;
for (i = 0; i < 8; i++) {
for (j = 0; j < 8; j++) {
if (*block < -128)
*pixels = 0;
else if (*block > 127)
*pixels = 255;
else
*pixels = (uint8_t) (*block + 128);
block++;
pixels++;
}
pixels += (line_size - 8);
}
}
void ff_add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
ptrdiff_t line_size)
{
int i;
/* read the pixels */
for (i = 0; i < 8; i++) {
pixels[0] = av_clip_uint8(pixels[0] + block[0]);
pixels[1] = av_clip_uint8(pixels[1] + block[1]);
pixels[2] = av_clip_uint8(pixels[2] + block[2]);
pixels[3] = av_clip_uint8(pixels[3] + block[3]);
pixels[4] = av_clip_uint8(pixels[4] + block[4]);
pixels[5] = av_clip_uint8(pixels[5] + block[5]);
pixels[6] = av_clip_uint8(pixels[6] + block[6]);
pixels[7] = av_clip_uint8(pixels[7] + block[7]);
pixels += line_size;
block += 8;
}
}
static void add_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
int line_size)
{
int i;
/* read the pixels */
for(i=0;i<4;i++) {
pixels[0] = av_clip_uint8(pixels[0] + block[0]);
pixels[1] = av_clip_uint8(pixels[1] + block[1]);
pixels[2] = av_clip_uint8(pixels[2] + block[2]);
pixels[3] = av_clip_uint8(pixels[3] + block[3]);
pixels += line_size;
block += 8;
}
}
static void add_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
int line_size)
{
int i;
/* read the pixels */
for(i=0;i<2;i++) {
pixels[0] = av_clip_uint8(pixels[0] + block[0]);
pixels[1] = av_clip_uint8(pixels[1] + block[1]);
pixels += line_size;
block += 8;
}
}
static void ff_jref_idct4_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{
ff_j_rev_dct4 (block);
put_pixels_clamped4_c(block, dest, line_size);
}
static void ff_jref_idct4_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{
ff_j_rev_dct4 (block);
add_pixels_clamped4_c(block, dest, line_size);
}
static void ff_jref_idct2_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{
ff_j_rev_dct2 (block);
put_pixels_clamped2_c(block, dest, line_size);
}
static void ff_jref_idct2_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{
ff_j_rev_dct2 (block);
add_pixels_clamped2_c(block, dest, line_size);
}
static void ff_jref_idct1_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{
dest[0] = av_clip_uint8((block[0] + 4)>>3);
}
static void ff_jref_idct1_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{
dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
}
av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
{
const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
if (avctx->lowres==1) {
c->idct_put = ff_jref_idct4_put;
c->idct_add = ff_jref_idct4_add;
c->idct = ff_j_rev_dct4;
c->perm_type = FF_IDCT_PERM_NONE;
} else if (avctx->lowres==2) {
c->idct_put = ff_jref_idct2_put;
c->idct_add = ff_jref_idct2_add;
c->idct = ff_j_rev_dct2;
c->perm_type = FF_IDCT_PERM_NONE;
} else if (avctx->lowres==3) {
c->idct_put = ff_jref_idct1_put;
c->idct_add = ff_jref_idct1_add;
c->idct = ff_j_rev_dct1;
c->perm_type = FF_IDCT_PERM_NONE;
} else {
if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) {
/* 10-bit MPEG-4 Simple Studio Profile requires a higher precision IDCT
However, it only uses idct_put */
if (c->mpeg4_studio_profile) {
c->idct_put = ff_simple_idct_put_int32_10bit;
c->idct_add = NULL;
c->idct = NULL;
} else {
c->idct_put = ff_simple_idct_put_int16_10bit;
c->idct_add = ff_simple_idct_add_int16_10bit;
c->idct = ff_simple_idct_int16_10bit;
}
c->perm_type = FF_IDCT_PERM_NONE;
} else if (avctx->bits_per_raw_sample == 12) {
c->idct_put = ff_simple_idct_put_int16_12bit;
c->idct_add = ff_simple_idct_add_int16_12bit;
c->idct = ff_simple_idct_int16_12bit;
c->perm_type = FF_IDCT_PERM_NONE;
} else {
if (avctx->idct_algo == FF_IDCT_INT) {
c->idct_put = ff_jref_idct_put;
c->idct_add = ff_jref_idct_add;
c->idct = ff_j_rev_dct;
c->perm_type = FF_IDCT_PERM_LIBMPEG2;
#if CONFIG_FAANIDCT
} else if (avctx->idct_algo == FF_IDCT_FAAN) {
c->idct_put = ff_faanidct_put;
c->idct_add = ff_faanidct_add;
c->idct = ff_faanidct;
c->perm_type = FF_IDCT_PERM_NONE;
#endif /* CONFIG_FAANIDCT */
} else { // accurate/default
/* Be sure FF_IDCT_NONE will select this one, since it uses FF_IDCT_PERM_NONE */
c->idct_put = ff_simple_idct_put_int16_8bit;
c->idct_add = ff_simple_idct_add_int16_8bit;
c->idct = ff_simple_idct_int16_8bit;
c->perm_type = FF_IDCT_PERM_NONE;
}
}
}
c->put_pixels_clamped = ff_put_pixels_clamped_c;
c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
c->add_pixels_clamped = ff_add_pixels_clamped_c;
if (CONFIG_MPEG4_DECODER && avctx->idct_algo == FF_IDCT_XVID)
ff_xvid_idct_init(c, avctx);
if (ARCH_AARCH64)
ff_idctdsp_init_aarch64(c, avctx, high_bit_depth);
if (ARCH_ALPHA)
ff_idctdsp_init_alpha(c, avctx, high_bit_depth);
if (ARCH_ARM)
ff_idctdsp_init_arm(c, avctx, high_bit_depth);
if (ARCH_PPC)
ff_idctdsp_init_ppc(c, avctx, high_bit_depth);
if (ARCH_X86)
ff_idctdsp_init_x86(c, avctx, high_bit_depth);
if (ARCH_MIPS)
ff_idctdsp_init_mips(c, avctx, high_bit_depth);
ff_init_scantable_permutation(c->idct_permutation,
c->perm_type);
}

View File

@ -0,0 +1,332 @@
/*
* This file is part of the Independent JPEG Group's software.
*
* The authors make NO WARRANTY or representation, either express or implied,
* with respect to this software, its quality, accuracy, merchantability, or
* fitness for a particular purpose. This software is provided "AS IS", and
* you, its user, assume the entire risk as to its quality and accuracy.
*
* This software is copyright (C) 1994-1996, Thomas G. Lane.
* All Rights Reserved except as specified below.
*
* Permission is hereby granted to use, copy, modify, and distribute this
* software (or portions thereof) for any purpose, without fee, subject to
* these conditions:
* (1) If any part of the source code for this software is distributed, then
* this README file must be included, with this copyright and no-warranty
* notice unaltered; and any additions, deletions, or changes to the original
* files must be clearly indicated in accompanying documentation.
* (2) If only executable code is distributed, then the accompanying
* documentation must state that "this software is based in part on the work
* of the Independent JPEG Group".
* (3) Permission for use of this software is granted only if the user accepts
* full responsibility for any undesirable consequences; the authors accept
* NO LIABILITY for damages of any kind.
*
* These conditions apply to any software derived from or based on the IJG
* code, not just to the unmodified library. If you use our work, you ought
* to acknowledge us.
*
* Permission is NOT granted for the use of any IJG author's name or company
* name in advertising or publicity relating to this software or products
* derived from it. This software may be referred to only as "the Independent
* JPEG Group's software".
*
* We specifically permit and encourage the use of this software as the basis
* of commercial products, provided that all warranty or liability claims are
* assumed by the product vendor.
*
* This file contains a fast, not so accurate integer implementation of the
* forward DCT (Discrete Cosine Transform).
*
* A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
* on each column. Direct algorithms are also available, but they are
* much more complex and seem not to be any faster when reduced to code.
*
* This implementation is based on Arai, Agui, and Nakajima's algorithm for
* scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
* Japanese, but the algorithm is described in the Pennebaker & Mitchell
* JPEG textbook (see REFERENCES section in file README). The following code
* is based directly on figure 4-8 in P&M.
* While an 8-point DCT cannot be done in less than 11 multiplies, it is
* possible to arrange the computation so that many of the multiplies are
* simple scalings of the final outputs. These multiplies can then be
* folded into the multiplications or divisions by the JPEG quantization
* table entries. The AA&N method leaves only 5 multiplies and 29 adds
* to be done in the DCT itself.
* The primary disadvantage of this method is that with fixed-point math,
* accuracy is lost due to imprecise representation of the scaled
* quantization values. The smaller the quantization table entry, the less
* precise the scaled value, so this implementation does worse with high-
* quality-setting files than with low-quality ones.
*/
/**
* @file
* Independent JPEG Group's fast AAN dct.
*/
#include <stdlib.h>
#include <stdio.h>
#include "libavutil/common.h"
#include "dct.h"
#define DCTSIZE 8
#define GLOBAL(x) x
#define RIGHT_SHIFT(x, n) ((x) >> (n))
/*
* This module is specialized to the case DCTSIZE = 8.
*/
#if DCTSIZE != 8
Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
#endif
/* Scaling decisions are generally the same as in the LL&M algorithm;
* see jfdctint.c for more details. However, we choose to descale
* (right shift) multiplication products as soon as they are formed,
* rather than carrying additional fractional bits into subsequent additions.
* This compromises accuracy slightly, but it lets us save a few shifts.
* More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
* everywhere except in the multiplications proper; this saves a good deal
* of work on 16-bit-int machines.
*
* Again to save a few shifts, the intermediate results between pass 1 and
* pass 2 are not upscaled, but are represented only to integral precision.
*
* A final compromise is to represent the multiplicative constants to only
* 8 fractional bits, rather than 13. This saves some shifting work on some
* machines, and may also reduce the cost of multiplication (since there
* are fewer one-bits in the constants).
*/
#define CONST_BITS 8
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
* causing a lot of useless floating-point operations at run time.
* To get around this we use the following pre-calculated constants.
* If you change CONST_BITS you may want to add appropriate values.
* (With a reasonable C compiler, you can just rely on the FIX() macro...)
*/
#if CONST_BITS == 8
#define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */
#define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */
#define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */
#define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */
#else
#define FIX_0_382683433 FIX(0.382683433)
#define FIX_0_541196100 FIX(0.541196100)
#define FIX_0_707106781 FIX(0.707106781)
#define FIX_1_306562965 FIX(1.306562965)
#endif
/* We can gain a little more speed, with a further compromise in accuracy,
* by omitting the addition in a descaling shift. This yields an incorrectly
* rounded result half the time...
*/
#ifndef USE_ACCURATE_ROUNDING
#undef DESCALE
#define DESCALE(x,n) RIGHT_SHIFT(x, n)
#endif
/* Multiply a int16_t variable by an int32_t constant, and immediately
* descale to yield a int16_t result.
*/
#define MULTIPLY(var,const) ((int16_t) DESCALE((var) * (const), CONST_BITS))
static av_always_inline void row_fdct(int16_t * data){
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1, z2, z3, z4, z5, z11, z13;
int16_t *dataptr;
int ctr;
/* Pass 1: process rows. */
dataptr = data;
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
tmp0 = dataptr[0] + dataptr[7];
tmp7 = dataptr[0] - dataptr[7];
tmp1 = dataptr[1] + dataptr[6];
tmp6 = dataptr[1] - dataptr[6];
tmp2 = dataptr[2] + dataptr[5];
tmp5 = dataptr[2] - dataptr[5];
tmp3 = dataptr[3] + dataptr[4];
tmp4 = dataptr[3] - dataptr[4];
/* Even part */
tmp10 = tmp0 + tmp3; /* phase 2 */
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
dataptr[0] = tmp10 + tmp11; /* phase 3 */
dataptr[4] = tmp10 - tmp11;
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
dataptr[2] = tmp13 + z1; /* phase 5 */
dataptr[6] = tmp13 - z1;
/* Odd part */
tmp10 = tmp4 + tmp5; /* phase 2 */
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;
/* The rotator is modified from fig 4-8 to avoid extra negations. */
z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
z11 = tmp7 + z3; /* phase 5 */
z13 = tmp7 - z3;
dataptr[5] = z13 + z2; /* phase 6 */
dataptr[3] = z13 - z2;
dataptr[1] = z11 + z4;
dataptr[7] = z11 - z4;
dataptr += DCTSIZE; /* advance pointer to next row */
}
}
/*
* Perform the forward DCT on one block of samples.
*/
GLOBAL(void)
ff_fdct_ifast (int16_t * data)
{
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1, z2, z3, z4, z5, z11, z13;
int16_t *dataptr;
int ctr;
row_fdct(data);
/* Pass 2: process columns. */
dataptr = data;
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
/* Even part */
tmp10 = tmp0 + tmp3; /* phase 2 */
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
dataptr[DCTSIZE*4] = tmp10 - tmp11;
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
dataptr[DCTSIZE*6] = tmp13 - z1;
/* Odd part */
tmp10 = tmp4 + tmp5; /* phase 2 */
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;
/* The rotator is modified from fig 4-8 to avoid extra negations. */
z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
z11 = tmp7 + z3; /* phase 5 */
z13 = tmp7 - z3;
dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
dataptr[DCTSIZE*3] = z13 - z2;
dataptr[DCTSIZE*1] = z11 + z4;
dataptr[DCTSIZE*7] = z11 - z4;
dataptr++; /* advance pointer to next column */
}
}
/*
* Perform the forward 2-4-8 DCT on one block of samples.
*/
GLOBAL(void)
ff_fdct_ifast248 (int16_t * data)
{
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1;
int16_t *dataptr;
int ctr;
row_fdct(data);
/* Pass 2: process columns. */
dataptr = data;
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
/* Even part */
tmp10 = tmp0 + tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
tmp13 = tmp0 - tmp3;
dataptr[DCTSIZE*0] = tmp10 + tmp11;
dataptr[DCTSIZE*4] = tmp10 - tmp11;
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
dataptr[DCTSIZE*2] = tmp13 + z1;
dataptr[DCTSIZE*6] = tmp13 - z1;
tmp10 = tmp4 + tmp7;
tmp11 = tmp5 + tmp6;
tmp12 = tmp5 - tmp6;
tmp13 = tmp4 - tmp7;
dataptr[DCTSIZE*1] = tmp10 + tmp11;
dataptr[DCTSIZE*5] = tmp10 - tmp11;
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
dataptr[DCTSIZE*3] = tmp13 + z1;
dataptr[DCTSIZE*7] = tmp13 - z1;
dataptr++; /* advance pointer to next column */
}
}
#undef GLOBAL
#undef CONST_BITS
#undef DESCALE
#undef FIX_0_541196100
#undef FIX_1_306562965

View File

@ -0,0 +1,25 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define BIT_DEPTH 8
#include "jfdctint_template.c"
#undef BIT_DEPTH
#define BIT_DEPTH 10
#include "jfdctint_template.c"
#undef BIT_DEPTH

View File

@ -0,0 +1,398 @@
/*
* This file is part of the Independent JPEG Group's software.
*
* The authors make NO WARRANTY or representation, either express or implied,
* with respect to this software, its quality, accuracy, merchantability, or
* fitness for a particular purpose. This software is provided "AS IS", and
* you, its user, assume the entire risk as to its quality and accuracy.
*
* This software is copyright (C) 1991-1996, Thomas G. Lane.
* All Rights Reserved except as specified below.
*
* Permission is hereby granted to use, copy, modify, and distribute this
* software (or portions thereof) for any purpose, without fee, subject to
* these conditions:
* (1) If any part of the source code for this software is distributed, then
* this README file must be included, with this copyright and no-warranty
* notice unaltered; and any additions, deletions, or changes to the original
* files must be clearly indicated in accompanying documentation.
* (2) If only executable code is distributed, then the accompanying
* documentation must state that "this software is based in part on the work
* of the Independent JPEG Group".
* (3) Permission for use of this software is granted only if the user accepts
* full responsibility for any undesirable consequences; the authors accept
* NO LIABILITY for damages of any kind.
*
* These conditions apply to any software derived from or based on the IJG
* code, not just to the unmodified library. If you use our work, you ought
* to acknowledge us.
*
* Permission is NOT granted for the use of any IJG author's name or company
* name in advertising or publicity relating to this software or products
* derived from it. This software may be referred to only as "the Independent
* JPEG Group's software".
*
* We specifically permit and encourage the use of this software as the basis
* of commercial products, provided that all warranty or liability claims are
* assumed by the product vendor.
*
* This file contains a slow-but-accurate integer implementation of the
* forward DCT (Discrete Cosine Transform).
*
* A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
* on each column. Direct algorithms are also available, but they are
* much more complex and seem not to be any faster when reduced to code.
*
* This implementation is based on an algorithm described in
* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
* The primary algorithm described there uses 11 multiplies and 29 adds.
* We use their alternate method with 12 multiplies and 32 adds.
* The advantage of this method is that no data path contains more than one
* multiplication; this allows a very simple and accurate implementation in
* scaled fixed-point arithmetic, with a minimal number of shifts.
*/
/**
* @file
* Independent JPEG Group's slow & accurate dct.
*/
#include "libavutil/common.h"
#include "dct.h"
#include "bit_depth_template.c"
#define DCTSIZE 8
#define BITS_IN_JSAMPLE BIT_DEPTH
#define GLOBAL(x) x
#define RIGHT_SHIFT(x, n) ((x) >> (n))
#define MULTIPLY16C16(var,const) ((var)*(const))
#define DESCALE(x,n) RIGHT_SHIFT((x) + (1 << ((n) - 1)), n)
/*
* This module is specialized to the case DCTSIZE = 8.
*/
#if DCTSIZE != 8
#error "Sorry, this code only copes with 8x8 DCTs."
#endif
/*
* The poop on this scaling stuff is as follows:
*
* Each 1-D DCT step produces outputs which are a factor of sqrt(N)
* larger than the true DCT outputs. The final outputs are therefore
* a factor of N larger than desired; since N=8 this can be cured by
* a simple right shift at the end of the algorithm. The advantage of
* this arrangement is that we save two multiplications per 1-D DCT,
* because the y0 and y4 outputs need not be divided by sqrt(N).
* In the IJG code, this factor of 8 is removed by the quantization step
* (in jcdctmgr.c), NOT in this module.
*
* We have to do addition and subtraction of the integer inputs, which
* is no problem, and multiplication by fractional constants, which is
* a problem to do in integer arithmetic. We multiply all the constants
* by CONST_SCALE and convert them to integer constants (thus retaining
* CONST_BITS bits of precision in the constants). After doing a
* multiplication we have to divide the product by CONST_SCALE, with proper
* rounding, to produce the correct output. This division can be done
* cheaply as a right shift of CONST_BITS bits. We postpone shifting
* as long as possible so that partial sums can be added together with
* full fractional precision.
*
* The outputs of the first pass are scaled up by PASS1_BITS bits so that
* they are represented to better-than-integral precision. These outputs
* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
* with the recommended scaling. (For 12-bit sample data, the intermediate
* array is int32_t anyway.)
*
* To avoid overflow of the 32-bit intermediate results in pass 2, we must
* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
* shows that the values given below are the most effective.
*/
#undef CONST_BITS
#undef PASS1_BITS
#undef OUT_SHIFT
#if BITS_IN_JSAMPLE == 8
#define CONST_BITS 13
#define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */
#define OUT_SHIFT PASS1_BITS
#else
#define CONST_BITS 13
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
#define OUT_SHIFT (PASS1_BITS + 1)
#endif
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
* causing a lot of useless floating-point operations at run time.
* To get around this we use the following pre-calculated constants.
* If you change CONST_BITS you may want to add appropriate values.
* (With a reasonable C compiler, you can just rely on the FIX() macro...)
*/
#if CONST_BITS == 13
#define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */
#define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */
#define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */
#define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */
#define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */
#define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */
#define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */
#define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */
#define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */
#define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */
#define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */
#define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */
#else
#define FIX_0_298631336 FIX(0.298631336)
#define FIX_0_390180644 FIX(0.390180644)
#define FIX_0_541196100 FIX(0.541196100)
#define FIX_0_765366865 FIX(0.765366865)
#define FIX_0_899976223 FIX(0.899976223)
#define FIX_1_175875602 FIX(1.175875602)
#define FIX_1_501321110 FIX(1.501321110)
#define FIX_1_847759065 FIX(1.847759065)
#define FIX_1_961570560 FIX(1.961570560)
#define FIX_2_053119869 FIX(2.053119869)
#define FIX_2_562915447 FIX(2.562915447)
#define FIX_3_072711026 FIX(3.072711026)
#endif
/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
* For 8-bit samples with the recommended scaling, all the variable
* and constant values involved are no more than 16 bits wide, so a
* 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
* For 12-bit samples, a full 32-bit multiplication will be needed.
*/
#if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2
#define MULTIPLY(var,const) MULTIPLY16C16(var,const)
#else
#define MULTIPLY(var,const) ((var) * (const))
#endif
static av_always_inline void FUNC(row_fdct)(int16_t *data)
{
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1, z2, z3, z4, z5;
int16_t *dataptr;
int ctr;
/* Pass 1: process rows. */
/* Note results are scaled up by sqrt(8) compared to a true DCT; */
/* furthermore, we scale the results by 2**PASS1_BITS. */
dataptr = data;
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
tmp0 = dataptr[0] + dataptr[7];
tmp7 = dataptr[0] - dataptr[7];
tmp1 = dataptr[1] + dataptr[6];
tmp6 = dataptr[1] - dataptr[6];
tmp2 = dataptr[2] + dataptr[5];
tmp5 = dataptr[2] - dataptr[5];
tmp3 = dataptr[3] + dataptr[4];
tmp4 = dataptr[3] - dataptr[4];
/* Even part per LL&M figure 1 --- note that published figure is faulty;
* rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
*/
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
dataptr[0] = (int16_t) ((tmp10 + tmp11) * (1 << PASS1_BITS));
dataptr[4] = (int16_t) ((tmp10 - tmp11) * (1 << PASS1_BITS));
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[2] = (int16_t) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS-PASS1_BITS);
dataptr[6] = (int16_t) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS-PASS1_BITS);
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
* cK represents cos(K*pi/16).
* i0..i3 in the paper are tmp4..tmp7 here.
*/
z1 = tmp4 + tmp7;
z2 = tmp5 + tmp6;
z3 = tmp4 + tmp6;
z4 = tmp5 + tmp7;
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
z3 += z5;
z4 += z5;
dataptr[7] = (int16_t) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
dataptr[5] = (int16_t) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
dataptr[3] = (int16_t) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
dataptr[1] = (int16_t) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
dataptr += DCTSIZE; /* advance pointer to next row */
}
}
/*
* Perform the forward DCT on one block of samples.
*/
GLOBAL(void)
FUNC(ff_jpeg_fdct_islow)(int16_t *data)
{
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1, z2, z3, z4, z5;
int16_t *dataptr;
int ctr;
FUNC(row_fdct)(data);
/* Pass 2: process columns.
* We remove the PASS1_BITS scaling, but leave the results scaled up
* by an overall factor of 8.
*/
dataptr = data;
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
/* Even part per LL&M figure 1 --- note that published figure is faulty;
* rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
*/
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS + OUT_SHIFT);
dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS + OUT_SHIFT);
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
* cK represents cos(K*pi/16).
* i0..i3 in the paper are tmp4..tmp7 here.
*/
z1 = tmp4 + tmp7;
z2 = tmp5 + tmp6;
z3 = tmp4 + tmp6;
z4 = tmp5 + tmp7;
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
z3 += z5;
z4 += z5;
dataptr[DCTSIZE*7] = DESCALE(tmp4 + z1 + z3, CONST_BITS + OUT_SHIFT);
dataptr[DCTSIZE*5] = DESCALE(tmp5 + z2 + z4, CONST_BITS + OUT_SHIFT);
dataptr[DCTSIZE*3] = DESCALE(tmp6 + z2 + z3, CONST_BITS + OUT_SHIFT);
dataptr[DCTSIZE*1] = DESCALE(tmp7 + z1 + z4, CONST_BITS + OUT_SHIFT);
dataptr++; /* advance pointer to next column */
}
}
/*
* The secret of DCT2-4-8 is really simple -- you do the usual 1-DCT
* on the rows and then, instead of doing even and odd, part on the columns
* you do even part two times.
*/
GLOBAL(void)
FUNC(ff_fdct248_islow)(int16_t *data)
{
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1;
int16_t *dataptr;
int ctr;
FUNC(row_fdct)(data);
/* Pass 2: process columns.
* We remove the PASS1_BITS scaling, but leave the results scaled up
* by an overall factor of 8.
*/
dataptr = data;
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
tmp10 = tmp0 + tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
tmp13 = tmp0 - tmp3;
dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS+OUT_SHIFT);
dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS+OUT_SHIFT);
tmp10 = tmp4 + tmp7;
tmp11 = tmp5 + tmp6;
tmp12 = tmp5 - tmp6;
tmp13 = tmp4 - tmp7;
dataptr[DCTSIZE*1] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
dataptr[DCTSIZE*5] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[DCTSIZE*3] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS + OUT_SHIFT);
dataptr[DCTSIZE*7] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS + OUT_SHIFT);
dataptr++; /* advance pointer to next column */
}
}

File diff suppressed because it is too large Load Diff

View File

@ -17,22 +17,49 @@ if CONFIG['FFVPX_ASFLAGS']:
SharedLibrary('mozavcodec')
SOURCES += [
'allcodecs.c',
'avdct.c',
'avpacket.c',
'bitstream.c',
'bitstream_filters.c',
'bsf.c',
'codec_desc.c',
'dct.c',
'dct32_fixed.c',
'dct32_float.c',
'decode.c',
'faandct.c',
'faanidct.c',
'fdctdsp.c',
'fft_fixed.c',
'fft_fixed_32.c',
'fft_float.c',
'fft_init_table.c',
'flac.c',
'flacdata.c',
'flacdec.c',
'flacdsp.c',
'idctdsp.c',
'jfdctfst.c',
'jfdctint.c',
'jrevdct.c',
'log2_tab.c',
'mpegaudio.c',
'mpegaudiodata.c',
'mpegaudiodec_fixed.c',
'mpegaudiodecheader.c',
'mpegaudiodsp.c',
'mpegaudiodsp_data.c',
'mpegaudiodsp_fixed.c',
'mpegaudiodsp_float.c',
'null_bsf.c',
'options.c',
'parser.c',
'pthread.c',
'pthread_frame.c',
'pthread_slice.c',
'rdft.c',
'reverse.c',
'simple_idct.c',
'utils.c',
'vorbis_parser.c',
'xiph.c'
@ -41,7 +68,6 @@ SOURCES += [
if not CONFIG['MOZ_FFVPX_AUDIOONLY']:
SOURCES += [
'avpicture.c',
'bitstream.c',
'bitstream_filter.c',
'golomb.c',
'h264pred.c',
@ -50,7 +76,6 @@ if not CONFIG['MOZ_FFVPX_AUDIOONLY']:
'profiles.c',
'qsv_api.c',
'raw.c',
'reverse.c',
'videodsp.c',
'vp56rac.c',
'vp8.c',
@ -74,8 +99,6 @@ if not CONFIG['MOZ_FFVPX_AUDIOONLY']:
if CONFIG['MOZ_LIBAV_FFT']:
SOURCES += [
'avfft.c',
'fft_float.c',
'rdft.c',
]
SYMBOLS_FILE = 'avcodec.symbols'

View File

@ -0,0 +1,50 @@
/*
* MPEG Audio common code
* Copyright (c) 2001, 2002 Fabrice Bellard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* MPEG Audio common code.
*/
#include "mpegaudio.h"
/* bitrate is in kb/s */
int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf)
{
int ch_bitrate, table;
ch_bitrate = bitrate / nb_channels;
if (!lsf) {
if ((freq == 48000 && ch_bitrate >= 56) ||
(ch_bitrate >= 56 && ch_bitrate <= 80))
table = 0;
else if (freq != 48000 && ch_bitrate >= 96)
table = 1;
else if (freq != 32000 && ch_bitrate <= 48)
table = 2;
else
table = 3;
} else {
table = 4;
}
return table;
}

View File

@ -0,0 +1,81 @@
/*
* copyright (c) 2001 Fabrice Bellard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* mpeg audio declarations for both encoder and decoder.
*/
#ifndef AVCODEC_MPEGAUDIO_H
#define AVCODEC_MPEGAUDIO_H
#ifndef USE_FLOATS
# define USE_FLOATS 0
#endif
#include <stdint.h>
#include "libavutil/internal.h"
/* max frame size, in samples */
#define MPA_FRAME_SIZE 1152
/* max compressed frame size */
#define MPA_MAX_CODED_FRAME_SIZE 1792
#define MPA_MAX_CHANNELS 2
#define SBLIMIT 32 /* number of subbands */
#define MPA_STEREO 0
#define MPA_JSTEREO 1
#define MPA_DUAL 2
#define MPA_MONO 3
#ifndef FRAC_BITS
#define FRAC_BITS 23 /* fractional bits for sb_samples and dct */
#define WFRAC_BITS 16 /* fractional bits for window */
#endif
#define IMDCT_SCALAR 1.759
#define FRAC_ONE (1 << FRAC_BITS)
#define FIX(a) ((int)((a) * FRAC_ONE))
#if USE_FLOATS
# define INTFLOAT float
# define SUINTFLOAT float
typedef float MPA_INT;
typedef float OUT_INT;
#elif FRAC_BITS <= 15
# define INTFLOAT int
# define SUINTFLOAT SUINT
typedef int16_t MPA_INT;
typedef int16_t OUT_INT;
#else
# define INTFLOAT int
# define SUINTFLOAT SUINT
typedef int32_t MPA_INT;
typedef int16_t OUT_INT;
#endif
int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
#endif /* AVCODEC_MPEGAUDIO_H */

View File

@ -0,0 +1,91 @@
/*
* Header file for hardcoded mpegaudiodec tables
*
* Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_MPEGAUDIO_TABLEGEN_H
#define AVCODEC_MPEGAUDIO_TABLEGEN_H
#include <stdint.h>
#include <math.h>
#include "libavutil/attributes.h"
#define TABLE_4_3_SIZE (8191 + 16)*4
#if CONFIG_HARDCODED_TABLES
#define mpegaudio_tableinit()
#include "libavcodec/mpegaudio_tables.h"
#else
static int8_t table_4_3_exp[TABLE_4_3_SIZE];
static uint32_t table_4_3_value[TABLE_4_3_SIZE];
static uint32_t exp_table_fixed[512];
static uint32_t expval_table_fixed[512][16];
static float exp_table_float[512];
static float expval_table_float[512][16];
#define FRAC_BITS 23
#define IMDCT_SCALAR 1.759
static av_cold void mpegaudio_tableinit(void)
{
int i, value, exponent;
static const double exp2_lut[4] = {
1.00000000000000000000, /* 2 ^ (0 * 0.25) */
1.18920711500272106672, /* 2 ^ (1 * 0.25) */
M_SQRT2 , /* 2 ^ (2 * 0.25) */
1.68179283050742908606, /* 2 ^ (3 * 0.25) */
};
static double pow43_lut[16];
double exp2_base = 2.11758236813575084767080625169910490512847900390625e-22; // 2^(-72)
double exp2_val;
double pow43_val = 0;
for (i = 0; i < 16; ++i)
pow43_lut[i] = i * cbrt(i);
for (i = 1; i < TABLE_4_3_SIZE; i++) {
double f, fm;
int e, m;
double value = i / 4;
if ((i & 3) == 0)
pow43_val = value / IMDCT_SCALAR * cbrt(value);
f = pow43_val * exp2_lut[i & 3];
fm = frexp(f, &e);
m = llrint(fm * (1LL << 31));
e += FRAC_BITS - 31 + 5 - 100;
/* normalized to FRAC_BITS */
table_4_3_value[i] = m;
table_4_3_exp[i] = -e;
}
for (exponent = 0; exponent < 512; exponent++) {
if (exponent && (exponent & 3) == 0)
exp2_base *= 2;
exp2_val = exp2_base * exp2_lut[exponent & 3] / IMDCT_SCALAR;
for (value = 0; value < 16; value++) {
double f = pow43_lut[value] * exp2_val;
expval_table_fixed[exponent][value] = (f < 0xFFFFFFFF ? llrint(f) : 0xFFFFFFFF);
expval_table_float[exponent][value] = f;
}
exp_table_fixed[exponent] = expval_table_fixed[exponent][1];
exp_table_float[exponent] = expval_table_float[exponent][1];
}
}
#endif /* CONFIG_HARDCODED_TABLES */
#endif /* AVCODEC_MPEGAUDIO_TABLEGEN_H */

View File

@ -0,0 +1,146 @@
/*
* MPEG Audio common tables
* copyright (c) 2002 Fabrice Bellard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* mpeg audio layer common tables.
*/
#include "mpegaudiodata.h"
const uint16_t avpriv_mpa_bitrate_tab[2][3][15] = {
{ {0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448 },
{0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384 },
{0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320 } },
{ {0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256},
{0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160},
{0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160}
}
};
const uint16_t avpriv_mpa_freq_tab[3] = { 44100, 48000, 32000 };
/*******************************************************/
/* layer 2 tables */
const int ff_mpa_sblimit_table[5] = { 27 , 30 , 8, 12 , 30 };
const int ff_mpa_quant_steps[17] = {
3, 5, 7, 9, 15,
31, 63, 127, 255, 511,
1023, 2047, 4095, 8191, 16383,
32767, 65535
};
/* we use a negative value if grouped */
const int ff_mpa_quant_bits[17] = {
-5, -7, 3, -10, 4,
5, 6, 7, 8, 9,
10, 11, 12, 13, 14,
15, 16
};
/* encoding tables which give the quantization index. Note how it is
possible to store them efficiently ! */
static const unsigned char alloc_table_1[] = {
4, 0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
4, 0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
4, 0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16,
4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16,
4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16,
4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16,
4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16,
4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16,
4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16,
4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16,
3, 0, 1, 2, 3, 4, 5, 16,
3, 0, 1, 2, 3, 4, 5, 16,
3, 0, 1, 2, 3, 4, 5, 16,
3, 0, 1, 2, 3, 4, 5, 16,
3, 0, 1, 2, 3, 4, 5, 16,
3, 0, 1, 2, 3, 4, 5, 16,
3, 0, 1, 2, 3, 4, 5, 16,
3, 0, 1, 2, 3, 4, 5, 16,
3, 0, 1, 2, 3, 4, 5, 16,
3, 0, 1, 2, 3, 4, 5, 16,
3, 0, 1, 2, 3, 4, 5, 16,
3, 0, 1, 2, 3, 4, 5, 16,
2, 0, 1, 16,
2, 0, 1, 16,
2, 0, 1, 16,
2, 0, 1, 16,
2, 0, 1, 16,
2, 0, 1, 16,
2, 0, 1, 16,
};
static const unsigned char alloc_table_3[] = {
4, 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4, 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3, 0, 1, 3, 4, 5, 6, 7,
3, 0, 1, 3, 4, 5, 6, 7,
3, 0, 1, 3, 4, 5, 6, 7,
3, 0, 1, 3, 4, 5, 6, 7,
3, 0, 1, 3, 4, 5, 6, 7,
3, 0, 1, 3, 4, 5, 6, 7,
3, 0, 1, 3, 4, 5, 6, 7,
3, 0, 1, 3, 4, 5, 6, 7,
3, 0, 1, 3, 4, 5, 6, 7,
3, 0, 1, 3, 4, 5, 6, 7,
};
static const unsigned char alloc_table_4[] = {
4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
3, 0, 1, 3, 4, 5, 6, 7,
3, 0, 1, 3, 4, 5, 6, 7,
3, 0, 1, 3, 4, 5, 6, 7,
3, 0, 1, 3, 4, 5, 6, 7,
3, 0, 1, 3, 4, 5, 6, 7,
3, 0, 1, 3, 4, 5, 6, 7,
3, 0, 1, 3, 4, 5, 6, 7,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
2, 0, 1, 3,
};
const unsigned char * const ff_mpa_alloc_tables[5] =
{ alloc_table_1, alloc_table_1, alloc_table_3, alloc_table_3, alloc_table_4, };

View File

@ -0,0 +1,44 @@
/*
* MPEG Audio common tables
* copyright (c) 2002 Fabrice Bellard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* mpeg audio layer common tables.
*/
#ifndef AVCODEC_MPEGAUDIODATA_H
#define AVCODEC_MPEGAUDIODATA_H
#include <stdint.h>
#include "internal.h"
#define MODE_EXT_MS_STEREO 2
#define MODE_EXT_I_STEREO 1
extern av_export_avcodec const uint16_t avpriv_mpa_bitrate_tab[2][3][15];
extern av_export_avcodec const uint16_t avpriv_mpa_freq_tab[3];
extern const int ff_mpa_sblimit_table[5];
extern const int ff_mpa_quant_steps[17];
extern const int ff_mpa_quant_bits[17];
extern const unsigned char * const ff_mpa_alloc_tables[5];
#endif /* AVCODEC_MPEGAUDIODATA_H */

View File

@ -0,0 +1,120 @@
/*
* Fixed-point MPEG audio decoder
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/samplefmt.h"
#define USE_FLOATS 0
#include "mpegaudio.h"
#define SHR(a,b) (((int)(a))>>(b))
/* WARNING: only correct for positive numbers */
#define FIXR_OLD(a) ((int)((a) * FRAC_ONE + 0.5))
#define FIXR(a) ((int)((a) * FRAC_ONE + 0.5))
#define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5))
#define MULH3(x, y, s) MULH((s)*(x), y)
#define MULLx(x, y, s) MULL((int)(x),(y),s)
#define RENAME(a) a ## _fixed
#define OUT_FMT AV_SAMPLE_FMT_S16
#define OUT_FMT_P AV_SAMPLE_FMT_S16P
#include "mpegaudiodec_template.c"
#if CONFIG_MP1_DECODER
AVCodec ff_mp1_decoder = {
.name = "mp1",
.long_name = NULL_IF_CONFIG_SMALL("MP1 (MPEG audio layer 1)"),
.type = AVMEDIA_TYPE_AUDIO,
.id = AV_CODEC_ID_MP1,
.priv_data_size = sizeof(MPADecodeContext),
.init = decode_init,
.decode = decode_frame,
.capabilities = AV_CODEC_CAP_DR1,
.flush = flush,
.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
AV_SAMPLE_FMT_S16,
AV_SAMPLE_FMT_NONE },
};
#endif
#if CONFIG_MP2_DECODER
AVCodec ff_mp2_decoder = {
.name = "mp2",
.long_name = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"),
.type = AVMEDIA_TYPE_AUDIO,
.id = AV_CODEC_ID_MP2,
.priv_data_size = sizeof(MPADecodeContext),
.init = decode_init,
.decode = decode_frame,
.capabilities = AV_CODEC_CAP_DR1,
.flush = flush,
.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
AV_SAMPLE_FMT_S16,
AV_SAMPLE_FMT_NONE },
};
#endif
#if CONFIG_MP3_DECODER
AVCodec ff_mp3_decoder = {
.name = "mp3",
.long_name = NULL_IF_CONFIG_SMALL("MP3 (MPEG audio layer 3)"),
.type = AVMEDIA_TYPE_AUDIO,
.id = AV_CODEC_ID_MP3,
.priv_data_size = sizeof(MPADecodeContext),
.init = decode_init,
.decode = decode_frame,
.capabilities = AV_CODEC_CAP_DR1,
.flush = flush,
.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
AV_SAMPLE_FMT_S16,
AV_SAMPLE_FMT_NONE },
};
#endif
#if CONFIG_MP3ADU_DECODER
AVCodec ff_mp3adu_decoder = {
.name = "mp3adu",
.long_name = NULL_IF_CONFIG_SMALL("ADU (Application Data Unit) MP3 (MPEG audio layer 3)"),
.type = AVMEDIA_TYPE_AUDIO,
.id = AV_CODEC_ID_MP3ADU,
.priv_data_size = sizeof(MPADecodeContext),
.init = decode_init,
.decode = decode_frame_adu,
.capabilities = AV_CODEC_CAP_DR1,
.flush = flush,
.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
AV_SAMPLE_FMT_S16,
AV_SAMPLE_FMT_NONE },
};
#endif
#if CONFIG_MP3ON4_DECODER
AVCodec ff_mp3on4_decoder = {
.name = "mp3on4",
.long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"),
.type = AVMEDIA_TYPE_AUDIO,
.id = AV_CODEC_ID_MP3ON4,
.priv_data_size = sizeof(MP3On4DecodeContext),
.init = decode_init_mp3on4,
.close = decode_close_mp3on4,
.decode = decode_frame_mp3on4,
.capabilities = AV_CODEC_CAP_DR1,
.flush = flush_mp3on4,
.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
AV_SAMPLE_FMT_NONE },
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,154 @@
/*
* MPEG Audio header decoder
* Copyright (c) 2001, 2002 Fabrice Bellard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* MPEG Audio header decoder.
*/
#include "libavutil/common.h"
#include "avcodec.h"
#include "internal.h"
#include "mpegaudio.h"
#include "mpegaudiodata.h"
#include "mpegaudiodecheader.h"
int avpriv_mpegaudio_decode_header(MPADecodeHeader *s, uint32_t header)
{
int sample_rate, frame_size, mpeg25, padding;
int sample_rate_index, bitrate_index;
int ret;
ret = ff_mpa_check_header(header);
if (ret < 0)
return ret;
if (header & (1<<20)) {
s->lsf = (header & (1<<19)) ? 0 : 1;
mpeg25 = 0;
} else {
s->lsf = 1;
mpeg25 = 1;
}
s->layer = 4 - ((header >> 17) & 3);
/* extract frequency */
sample_rate_index = (header >> 10) & 3;
if (sample_rate_index >= FF_ARRAY_ELEMS(avpriv_mpa_freq_tab))
sample_rate_index = 0;
sample_rate = avpriv_mpa_freq_tab[sample_rate_index] >> (s->lsf + mpeg25);
sample_rate_index += 3 * (s->lsf + mpeg25);
s->sample_rate_index = sample_rate_index;
s->error_protection = ((header >> 16) & 1) ^ 1;
s->sample_rate = sample_rate;
bitrate_index = (header >> 12) & 0xf;
padding = (header >> 9) & 1;
//extension = (header >> 8) & 1;
s->mode = (header >> 6) & 3;
s->mode_ext = (header >> 4) & 3;
//copyright = (header >> 3) & 1;
//original = (header >> 2) & 1;
//emphasis = header & 3;
if (s->mode == MPA_MONO)
s->nb_channels = 1;
else
s->nb_channels = 2;
if (bitrate_index != 0) {
frame_size = avpriv_mpa_bitrate_tab[s->lsf][s->layer - 1][bitrate_index];
s->bit_rate = frame_size * 1000;
switch(s->layer) {
case 1:
frame_size = (frame_size * 12000) / sample_rate;
frame_size = (frame_size + padding) * 4;
break;
case 2:
frame_size = (frame_size * 144000) / sample_rate;
frame_size += padding;
break;
default:
case 3:
frame_size = (frame_size * 144000) / (sample_rate << s->lsf);
frame_size += padding;
break;
}
s->frame_size = frame_size;
} else {
/* if no frame size computed, signal it */
return 1;
}
#if defined(DEBUG)
ff_dlog(NULL, "layer%d, %d Hz, %d kbits/s, ",
s->layer, s->sample_rate, s->bit_rate);
if (s->nb_channels == 2) {
if (s->layer == 3) {
if (s->mode_ext & MODE_EXT_MS_STEREO)
ff_dlog(NULL, "ms-");
if (s->mode_ext & MODE_EXT_I_STEREO)
ff_dlog(NULL, "i-");
}
ff_dlog(NULL, "stereo");
} else {
ff_dlog(NULL, "mono");
}
ff_dlog(NULL, "\n");
#endif
return 0;
}
int ff_mpa_decode_header(uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate, enum AVCodecID *codec_id)
{
MPADecodeHeader s1, *s = &s1;
if (avpriv_mpegaudio_decode_header(s, head) != 0) {
return -1;
}
switch(s->layer) {
case 1:
*codec_id = AV_CODEC_ID_MP1;
*frame_size = 384;
break;
case 2:
*codec_id = AV_CODEC_ID_MP2;
*frame_size = 1152;
break;
default:
case 3:
if (*codec_id != AV_CODEC_ID_MP3ADU)
*codec_id = AV_CODEC_ID_MP3;
if (s->lsf)
*frame_size = 576;
else
*frame_size = 1152;
break;
}
*sample_rate = s->sample_rate;
*channels = s->nb_channels;
*bit_rate = s->bit_rate;
return s->frame_size;
}

View File

@ -0,0 +1,77 @@
/*
* MPEG Audio header decoder
* Copyright (c) 2001, 2002 Fabrice Bellard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* MPEG Audio header decoder.
*/
#ifndef AVCODEC_MPEGAUDIODECHEADER_H
#define AVCODEC_MPEGAUDIODECHEADER_H
#include "avcodec.h"
#define MP3_MASK 0xFFFE0CCF
#define MPA_DECODE_HEADER \
int frame_size; \
int error_protection; \
int layer; \
int sample_rate; \
int sample_rate_index; /* between 0 and 8 */ \
int bit_rate; \
int nb_channels; \
int mode; \
int mode_ext; \
int lsf;
typedef struct MPADecodeHeader {
MPA_DECODE_HEADER
} MPADecodeHeader;
/* header decoding. MUST check the header before because no
consistency check is done there. Return 1 if free format found and
that the frame size must be computed externally */
int avpriv_mpegaudio_decode_header(MPADecodeHeader *s, uint32_t header);
/* useful helper to get MPEG audio stream info. Return -1 if error in
header, otherwise the coded frame size in bytes */
int ff_mpa_decode_header(uint32_t head, int *sample_rate,
int *channels, int *frame_size, int *bitrate, enum AVCodecID *codec_id);
/* fast header check for resync */
static inline int ff_mpa_check_header(uint32_t header){
/* header */
if ((header & 0xffe00000) != 0xffe00000)
return -1;
/* layer check */
if ((header & (3<<17)) == 0)
return -1;
/* bit rate */
if ((header & (0xf<<12)) == 0xf<<12)
return -1;
/* frequency */
if ((header & (3<<10)) == 3<<10)
return -1;
return 0;
}
#endif /* AVCODEC_MPEGAUDIODECHEADER_H */

View File

@ -0,0 +1,615 @@
/*
* MPEG Audio decoder
* copyright (c) 2002 Fabrice Bellard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* mpeg audio layer decoder tables.
*/
#ifndef AVCODEC_MPEGAUDIODECTAB_H
#define AVCODEC_MPEGAUDIODECTAB_H
#include <stddef.h>
#include <stdint.h>
#include "mpegaudio.h"
/*******************************************************/
/* layer 3 tables */
/* layer 3 huffman tables */
typedef struct HuffTable {
int xsize;
const uint8_t *bits;
const uint16_t *codes;
} HuffTable;
/* layer3 scale factor size */
static const uint8_t slen_table[2][16] = {
{ 0, 0, 0, 0, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
{ 0, 1, 2, 3, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 3 },
};
/* number of lsf scale factors for a given size */
static const uint8_t lsf_nsf_table[6][3][4] = {
{ { 6, 5, 5, 5 }, { 9, 9, 9, 9 }, { 6, 9, 9, 9 } },
{ { 6, 5, 7, 3 }, { 9, 9, 12, 6 }, { 6, 9, 12, 6 } },
{ { 11, 10, 0, 0 }, { 18, 18, 0, 0 }, { 15, 18, 0, 0 } },
{ { 7, 7, 7, 0 }, { 12, 12, 12, 0 }, { 6, 15, 12, 0 } },
{ { 6, 6, 6, 3 }, { 12, 9, 9, 6 }, { 6, 12, 9, 6 } },
{ { 8, 8, 5, 0 }, { 15, 12, 9, 0 }, { 6, 18, 9, 0 } },
};
/* mpegaudio layer 3 huffman tables */
static const uint16_t mpa_huffcodes_1[4] = {
0x0001, 0x0001, 0x0001, 0x0000,
};
static const uint8_t mpa_huffbits_1[4] = {
1, 3, 2, 3,
};
static const uint16_t mpa_huffcodes_2[9] = {
0x0001, 0x0002, 0x0001, 0x0003, 0x0001, 0x0001, 0x0003, 0x0002,
0x0000,
};
static const uint8_t mpa_huffbits_2[9] = {
1, 3, 6, 3, 3, 5, 5, 5,
6,
};
static const uint16_t mpa_huffcodes_3[9] = {
0x0003, 0x0002, 0x0001, 0x0001, 0x0001, 0x0001, 0x0003, 0x0002,
0x0000,
};
static const uint8_t mpa_huffbits_3[9] = {
2, 2, 6, 3, 2, 5, 5, 5,
6,
};
static const uint16_t mpa_huffcodes_5[16] = {
0x0001, 0x0002, 0x0006, 0x0005, 0x0003, 0x0001, 0x0004, 0x0004,
0x0007, 0x0005, 0x0007, 0x0001, 0x0006, 0x0001, 0x0001, 0x0000,
};
static const uint8_t mpa_huffbits_5[16] = {
1, 3, 6, 7, 3, 3, 6, 7,
6, 6, 7, 8, 7, 6, 7, 8,
};
static const uint16_t mpa_huffcodes_6[16] = {
0x0007, 0x0003, 0x0005, 0x0001, 0x0006, 0x0002, 0x0003, 0x0002,
0x0005, 0x0004, 0x0004, 0x0001, 0x0003, 0x0003, 0x0002, 0x0000,
};
static const uint8_t mpa_huffbits_6[16] = {
3, 3, 5, 7, 3, 2, 4, 5,
4, 4, 5, 6, 6, 5, 6, 7,
};
static const uint16_t mpa_huffcodes_7[36] = {
0x0001, 0x0002, 0x000a, 0x0013, 0x0010, 0x000a, 0x0003, 0x0003,
0x0007, 0x000a, 0x0005, 0x0003, 0x000b, 0x0004, 0x000d, 0x0011,
0x0008, 0x0004, 0x000c, 0x000b, 0x0012, 0x000f, 0x000b, 0x0002,
0x0007, 0x0006, 0x0009, 0x000e, 0x0003, 0x0001, 0x0006, 0x0004,
0x0005, 0x0003, 0x0002, 0x0000,
};
static const uint8_t mpa_huffbits_7[36] = {
1, 3, 6, 8, 8, 9, 3, 4,
6, 7, 7, 8, 6, 5, 7, 8,
8, 9, 7, 7, 8, 9, 9, 9,
7, 7, 8, 9, 9, 10, 8, 8,
9, 10, 10, 10,
};
static const uint16_t mpa_huffcodes_8[36] = {
0x0003, 0x0004, 0x0006, 0x0012, 0x000c, 0x0005, 0x0005, 0x0001,
0x0002, 0x0010, 0x0009, 0x0003, 0x0007, 0x0003, 0x0005, 0x000e,
0x0007, 0x0003, 0x0013, 0x0011, 0x000f, 0x000d, 0x000a, 0x0004,
0x000d, 0x0005, 0x0008, 0x000b, 0x0005, 0x0001, 0x000c, 0x0004,
0x0004, 0x0001, 0x0001, 0x0000,
};
static const uint8_t mpa_huffbits_8[36] = {
2, 3, 6, 8, 8, 9, 3, 2,
4, 8, 8, 8, 6, 4, 6, 8,
8, 9, 8, 8, 8, 9, 9, 10,
8, 7, 8, 9, 10, 10, 9, 8,
9, 9, 11, 11,
};
static const uint16_t mpa_huffcodes_9[36] = {
0x0007, 0x0005, 0x0009, 0x000e, 0x000f, 0x0007, 0x0006, 0x0004,
0x0005, 0x0005, 0x0006, 0x0007, 0x0007, 0x0006, 0x0008, 0x0008,
0x0008, 0x0005, 0x000f, 0x0006, 0x0009, 0x000a, 0x0005, 0x0001,
0x000b, 0x0007, 0x0009, 0x0006, 0x0004, 0x0001, 0x000e, 0x0004,
0x0006, 0x0002, 0x0006, 0x0000,
};
static const uint8_t mpa_huffbits_9[36] = {
3, 3, 5, 6, 8, 9, 3, 3,
4, 5, 6, 8, 4, 4, 5, 6,
7, 8, 6, 5, 6, 7, 7, 8,
7, 6, 7, 7, 8, 9, 8, 7,
8, 8, 9, 9,
};
static const uint16_t mpa_huffcodes_10[64] = {
0x0001, 0x0002, 0x000a, 0x0017, 0x0023, 0x001e, 0x000c, 0x0011,
0x0003, 0x0003, 0x0008, 0x000c, 0x0012, 0x0015, 0x000c, 0x0007,
0x000b, 0x0009, 0x000f, 0x0015, 0x0020, 0x0028, 0x0013, 0x0006,
0x000e, 0x000d, 0x0016, 0x0022, 0x002e, 0x0017, 0x0012, 0x0007,
0x0014, 0x0013, 0x0021, 0x002f, 0x001b, 0x0016, 0x0009, 0x0003,
0x001f, 0x0016, 0x0029, 0x001a, 0x0015, 0x0014, 0x0005, 0x0003,
0x000e, 0x000d, 0x000a, 0x000b, 0x0010, 0x0006, 0x0005, 0x0001,
0x0009, 0x0008, 0x0007, 0x0008, 0x0004, 0x0004, 0x0002, 0x0000,
};
static const uint8_t mpa_huffbits_10[64] = {
1, 3, 6, 8, 9, 9, 9, 10,
3, 4, 6, 7, 8, 9, 8, 8,
6, 6, 7, 8, 9, 10, 9, 9,
7, 7, 8, 9, 10, 10, 9, 10,
8, 8, 9, 10, 10, 10, 10, 10,
9, 9, 10, 10, 11, 11, 10, 11,
8, 8, 9, 10, 10, 10, 11, 11,
9, 8, 9, 10, 10, 11, 11, 11,
};
static const uint16_t mpa_huffcodes_11[64] = {
0x0003, 0x0004, 0x000a, 0x0018, 0x0022, 0x0021, 0x0015, 0x000f,
0x0005, 0x0003, 0x0004, 0x000a, 0x0020, 0x0011, 0x000b, 0x000a,
0x000b, 0x0007, 0x000d, 0x0012, 0x001e, 0x001f, 0x0014, 0x0005,
0x0019, 0x000b, 0x0013, 0x003b, 0x001b, 0x0012, 0x000c, 0x0005,
0x0023, 0x0021, 0x001f, 0x003a, 0x001e, 0x0010, 0x0007, 0x0005,
0x001c, 0x001a, 0x0020, 0x0013, 0x0011, 0x000f, 0x0008, 0x000e,
0x000e, 0x000c, 0x0009, 0x000d, 0x000e, 0x0009, 0x0004, 0x0001,
0x000b, 0x0004, 0x0006, 0x0006, 0x0006, 0x0003, 0x0002, 0x0000,
};
static const uint8_t mpa_huffbits_11[64] = {
2, 3, 5, 7, 8, 9, 8, 9,
3, 3, 4, 6, 8, 8, 7, 8,
5, 5, 6, 7, 8, 9, 8, 8,
7, 6, 7, 9, 8, 10, 8, 9,
8, 8, 8, 9, 9, 10, 9, 10,
8, 8, 9, 10, 10, 11, 10, 11,
8, 7, 7, 8, 9, 10, 10, 10,
8, 7, 8, 9, 10, 10, 10, 10,
};
static const uint16_t mpa_huffcodes_12[64] = {
0x0009, 0x0006, 0x0010, 0x0021, 0x0029, 0x0027, 0x0026, 0x001a,
0x0007, 0x0005, 0x0006, 0x0009, 0x0017, 0x0010, 0x001a, 0x000b,
0x0011, 0x0007, 0x000b, 0x000e, 0x0015, 0x001e, 0x000a, 0x0007,
0x0011, 0x000a, 0x000f, 0x000c, 0x0012, 0x001c, 0x000e, 0x0005,
0x0020, 0x000d, 0x0016, 0x0013, 0x0012, 0x0010, 0x0009, 0x0005,
0x0028, 0x0011, 0x001f, 0x001d, 0x0011, 0x000d, 0x0004, 0x0002,
0x001b, 0x000c, 0x000b, 0x000f, 0x000a, 0x0007, 0x0004, 0x0001,
0x001b, 0x000c, 0x0008, 0x000c, 0x0006, 0x0003, 0x0001, 0x0000,
};
static const uint8_t mpa_huffbits_12[64] = {
4, 3, 5, 7, 8, 9, 9, 9,
3, 3, 4, 5, 7, 7, 8, 8,
5, 4, 5, 6, 7, 8, 7, 8,
6, 5, 6, 6, 7, 8, 8, 8,
7, 6, 7, 7, 8, 8, 8, 9,
8, 7, 8, 8, 8, 9, 8, 9,
8, 7, 7, 8, 8, 9, 9, 10,
9, 8, 8, 9, 9, 9, 9, 10,
};
static const uint16_t mpa_huffcodes_13[256] = {
0x0001, 0x0005, 0x000e, 0x0015, 0x0022, 0x0033, 0x002e, 0x0047,
0x002a, 0x0034, 0x0044, 0x0034, 0x0043, 0x002c, 0x002b, 0x0013,
0x0003, 0x0004, 0x000c, 0x0013, 0x001f, 0x001a, 0x002c, 0x0021,
0x001f, 0x0018, 0x0020, 0x0018, 0x001f, 0x0023, 0x0016, 0x000e,
0x000f, 0x000d, 0x0017, 0x0024, 0x003b, 0x0031, 0x004d, 0x0041,
0x001d, 0x0028, 0x001e, 0x0028, 0x001b, 0x0021, 0x002a, 0x0010,
0x0016, 0x0014, 0x0025, 0x003d, 0x0038, 0x004f, 0x0049, 0x0040,
0x002b, 0x004c, 0x0038, 0x0025, 0x001a, 0x001f, 0x0019, 0x000e,
0x0023, 0x0010, 0x003c, 0x0039, 0x0061, 0x004b, 0x0072, 0x005b,
0x0036, 0x0049, 0x0037, 0x0029, 0x0030, 0x0035, 0x0017, 0x0018,
0x003a, 0x001b, 0x0032, 0x0060, 0x004c, 0x0046, 0x005d, 0x0054,
0x004d, 0x003a, 0x004f, 0x001d, 0x004a, 0x0031, 0x0029, 0x0011,
0x002f, 0x002d, 0x004e, 0x004a, 0x0073, 0x005e, 0x005a, 0x004f,
0x0045, 0x0053, 0x0047, 0x0032, 0x003b, 0x0026, 0x0024, 0x000f,
0x0048, 0x0022, 0x0038, 0x005f, 0x005c, 0x0055, 0x005b, 0x005a,
0x0056, 0x0049, 0x004d, 0x0041, 0x0033, 0x002c, 0x002b, 0x002a,
0x002b, 0x0014, 0x001e, 0x002c, 0x0037, 0x004e, 0x0048, 0x0057,
0x004e, 0x003d, 0x002e, 0x0036, 0x0025, 0x001e, 0x0014, 0x0010,
0x0035, 0x0019, 0x0029, 0x0025, 0x002c, 0x003b, 0x0036, 0x0051,
0x0042, 0x004c, 0x0039, 0x0036, 0x0025, 0x0012, 0x0027, 0x000b,
0x0023, 0x0021, 0x001f, 0x0039, 0x002a, 0x0052, 0x0048, 0x0050,
0x002f, 0x003a, 0x0037, 0x0015, 0x0016, 0x001a, 0x0026, 0x0016,
0x0035, 0x0019, 0x0017, 0x0026, 0x0046, 0x003c, 0x0033, 0x0024,
0x0037, 0x001a, 0x0022, 0x0017, 0x001b, 0x000e, 0x0009, 0x0007,
0x0022, 0x0020, 0x001c, 0x0027, 0x0031, 0x004b, 0x001e, 0x0034,
0x0030, 0x0028, 0x0034, 0x001c, 0x0012, 0x0011, 0x0009, 0x0005,
0x002d, 0x0015, 0x0022, 0x0040, 0x0038, 0x0032, 0x0031, 0x002d,
0x001f, 0x0013, 0x000c, 0x000f, 0x000a, 0x0007, 0x0006, 0x0003,
0x0030, 0x0017, 0x0014, 0x0027, 0x0024, 0x0023, 0x0035, 0x0015,
0x0010, 0x0017, 0x000d, 0x000a, 0x0006, 0x0001, 0x0004, 0x0002,
0x0010, 0x000f, 0x0011, 0x001b, 0x0019, 0x0014, 0x001d, 0x000b,
0x0011, 0x000c, 0x0010, 0x0008, 0x0001, 0x0001, 0x0000, 0x0001,
};
static const uint8_t mpa_huffbits_13[256] = {
1, 4, 6, 7, 8, 9, 9, 10,
9, 10, 11, 11, 12, 12, 13, 13,
3, 4, 6, 7, 8, 8, 9, 9,
9, 9, 10, 10, 11, 12, 12, 12,
6, 6, 7, 8, 9, 9, 10, 10,
9, 10, 10, 11, 11, 12, 13, 13,
7, 7, 8, 9, 9, 10, 10, 10,
10, 11, 11, 11, 11, 12, 13, 13,
8, 7, 9, 9, 10, 10, 11, 11,
10, 11, 11, 12, 12, 13, 13, 14,
9, 8, 9, 10, 10, 10, 11, 11,
11, 11, 12, 11, 13, 13, 14, 14,
9, 9, 10, 10, 11, 11, 11, 11,
11, 12, 12, 12, 13, 13, 14, 14,
10, 9, 10, 11, 11, 11, 12, 12,
12, 12, 13, 13, 13, 14, 16, 16,
9, 8, 9, 10, 10, 11, 11, 12,
12, 12, 12, 13, 13, 14, 15, 15,
10, 9, 10, 10, 11, 11, 11, 13,
12, 13, 13, 14, 14, 14, 16, 15,
10, 10, 10, 11, 11, 12, 12, 13,
12, 13, 14, 13, 14, 15, 16, 17,
11, 10, 10, 11, 12, 12, 12, 12,
13, 13, 13, 14, 15, 15, 15, 16,
11, 11, 11, 12, 12, 13, 12, 13,
14, 14, 15, 15, 15, 16, 16, 16,
12, 11, 12, 13, 13, 13, 14, 14,
14, 14, 14, 15, 16, 15, 16, 16,
13, 12, 12, 13, 13, 13, 15, 14,
14, 17, 15, 15, 15, 17, 16, 16,
12, 12, 13, 14, 14, 14, 15, 14,
15, 15, 16, 16, 19, 18, 19, 16,
};
static const uint16_t mpa_huffcodes_15[256] = {
0x0007, 0x000c, 0x0012, 0x0035, 0x002f, 0x004c, 0x007c, 0x006c,
0x0059, 0x007b, 0x006c, 0x0077, 0x006b, 0x0051, 0x007a, 0x003f,
0x000d, 0x0005, 0x0010, 0x001b, 0x002e, 0x0024, 0x003d, 0x0033,
0x002a, 0x0046, 0x0034, 0x0053, 0x0041, 0x0029, 0x003b, 0x0024,
0x0013, 0x0011, 0x000f, 0x0018, 0x0029, 0x0022, 0x003b, 0x0030,
0x0028, 0x0040, 0x0032, 0x004e, 0x003e, 0x0050, 0x0038, 0x0021,
0x001d, 0x001c, 0x0019, 0x002b, 0x0027, 0x003f, 0x0037, 0x005d,
0x004c, 0x003b, 0x005d, 0x0048, 0x0036, 0x004b, 0x0032, 0x001d,
0x0034, 0x0016, 0x002a, 0x0028, 0x0043, 0x0039, 0x005f, 0x004f,
0x0048, 0x0039, 0x0059, 0x0045, 0x0031, 0x0042, 0x002e, 0x001b,
0x004d, 0x0025, 0x0023, 0x0042, 0x003a, 0x0034, 0x005b, 0x004a,
0x003e, 0x0030, 0x004f, 0x003f, 0x005a, 0x003e, 0x0028, 0x0026,
0x007d, 0x0020, 0x003c, 0x0038, 0x0032, 0x005c, 0x004e, 0x0041,
0x0037, 0x0057, 0x0047, 0x0033, 0x0049, 0x0033, 0x0046, 0x001e,
0x006d, 0x0035, 0x0031, 0x005e, 0x0058, 0x004b, 0x0042, 0x007a,
0x005b, 0x0049, 0x0038, 0x002a, 0x0040, 0x002c, 0x0015, 0x0019,
0x005a, 0x002b, 0x0029, 0x004d, 0x0049, 0x003f, 0x0038, 0x005c,
0x004d, 0x0042, 0x002f, 0x0043, 0x0030, 0x0035, 0x0024, 0x0014,
0x0047, 0x0022, 0x0043, 0x003c, 0x003a, 0x0031, 0x0058, 0x004c,
0x0043, 0x006a, 0x0047, 0x0036, 0x0026, 0x0027, 0x0017, 0x000f,
0x006d, 0x0035, 0x0033, 0x002f, 0x005a, 0x0052, 0x003a, 0x0039,
0x0030, 0x0048, 0x0039, 0x0029, 0x0017, 0x001b, 0x003e, 0x0009,
0x0056, 0x002a, 0x0028, 0x0025, 0x0046, 0x0040, 0x0034, 0x002b,
0x0046, 0x0037, 0x002a, 0x0019, 0x001d, 0x0012, 0x000b, 0x000b,
0x0076, 0x0044, 0x001e, 0x0037, 0x0032, 0x002e, 0x004a, 0x0041,
0x0031, 0x0027, 0x0018, 0x0010, 0x0016, 0x000d, 0x000e, 0x0007,
0x005b, 0x002c, 0x0027, 0x0026, 0x0022, 0x003f, 0x0034, 0x002d,
0x001f, 0x0034, 0x001c, 0x0013, 0x000e, 0x0008, 0x0009, 0x0003,
0x007b, 0x003c, 0x003a, 0x0035, 0x002f, 0x002b, 0x0020, 0x0016,
0x0025, 0x0018, 0x0011, 0x000c, 0x000f, 0x000a, 0x0002, 0x0001,
0x0047, 0x0025, 0x0022, 0x001e, 0x001c, 0x0014, 0x0011, 0x001a,
0x0015, 0x0010, 0x000a, 0x0006, 0x0008, 0x0006, 0x0002, 0x0000,
};
static const uint8_t mpa_huffbits_15[256] = {
3, 4, 5, 7, 7, 8, 9, 9,
9, 10, 10, 11, 11, 11, 12, 13,
4, 3, 5, 6, 7, 7, 8, 8,
8, 9, 9, 10, 10, 10, 11, 11,
5, 5, 5, 6, 7, 7, 8, 8,
8, 9, 9, 10, 10, 11, 11, 11,
6, 6, 6, 7, 7, 8, 8, 9,
9, 9, 10, 10, 10, 11, 11, 11,
7, 6, 7, 7, 8, 8, 9, 9,
9, 9, 10, 10, 10, 11, 11, 11,
8, 7, 7, 8, 8, 8, 9, 9,
9, 9, 10, 10, 11, 11, 11, 12,
9, 7, 8, 8, 8, 9, 9, 9,
9, 10, 10, 10, 11, 11, 12, 12,
9, 8, 8, 9, 9, 9, 9, 10,
10, 10, 10, 10, 11, 11, 11, 12,
9, 8, 8, 9, 9, 9, 9, 10,
10, 10, 10, 11, 11, 12, 12, 12,
9, 8, 9, 9, 9, 9, 10, 10,
10, 11, 11, 11, 11, 12, 12, 12,
10, 9, 9, 9, 10, 10, 10, 10,
10, 11, 11, 11, 11, 12, 13, 12,
10, 9, 9, 9, 10, 10, 10, 10,
11, 11, 11, 11, 12, 12, 12, 13,
11, 10, 9, 10, 10, 10, 11, 11,
11, 11, 11, 11, 12, 12, 13, 13,
11, 10, 10, 10, 10, 11, 11, 11,
11, 12, 12, 12, 12, 12, 13, 13,
12, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 13, 13, 12, 13,
12, 11, 11, 11, 11, 11, 11, 12,
12, 12, 12, 12, 13, 13, 13, 13,
};
static const uint16_t mpa_huffcodes_16[256] = {
0x0001, 0x0005, 0x000e, 0x002c, 0x004a, 0x003f, 0x006e, 0x005d,
0x00ac, 0x0095, 0x008a, 0x00f2, 0x00e1, 0x00c3, 0x0178, 0x0011,
0x0003, 0x0004, 0x000c, 0x0014, 0x0023, 0x003e, 0x0035, 0x002f,
0x0053, 0x004b, 0x0044, 0x0077, 0x00c9, 0x006b, 0x00cf, 0x0009,
0x000f, 0x000d, 0x0017, 0x0026, 0x0043, 0x003a, 0x0067, 0x005a,
0x00a1, 0x0048, 0x007f, 0x0075, 0x006e, 0x00d1, 0x00ce, 0x0010,
0x002d, 0x0015, 0x0027, 0x0045, 0x0040, 0x0072, 0x0063, 0x0057,
0x009e, 0x008c, 0x00fc, 0x00d4, 0x00c7, 0x0183, 0x016d, 0x001a,
0x004b, 0x0024, 0x0044, 0x0041, 0x0073, 0x0065, 0x00b3, 0x00a4,
0x009b, 0x0108, 0x00f6, 0x00e2, 0x018b, 0x017e, 0x016a, 0x0009,
0x0042, 0x001e, 0x003b, 0x0038, 0x0066, 0x00b9, 0x00ad, 0x0109,
0x008e, 0x00fd, 0x00e8, 0x0190, 0x0184, 0x017a, 0x01bd, 0x0010,
0x006f, 0x0036, 0x0034, 0x0064, 0x00b8, 0x00b2, 0x00a0, 0x0085,
0x0101, 0x00f4, 0x00e4, 0x00d9, 0x0181, 0x016e, 0x02cb, 0x000a,
0x0062, 0x0030, 0x005b, 0x0058, 0x00a5, 0x009d, 0x0094, 0x0105,
0x00f8, 0x0197, 0x018d, 0x0174, 0x017c, 0x0379, 0x0374, 0x0008,
0x0055, 0x0054, 0x0051, 0x009f, 0x009c, 0x008f, 0x0104, 0x00f9,
0x01ab, 0x0191, 0x0188, 0x017f, 0x02d7, 0x02c9, 0x02c4, 0x0007,
0x009a, 0x004c, 0x0049, 0x008d, 0x0083, 0x0100, 0x00f5, 0x01aa,
0x0196, 0x018a, 0x0180, 0x02df, 0x0167, 0x02c6, 0x0160, 0x000b,
0x008b, 0x0081, 0x0043, 0x007d, 0x00f7, 0x00e9, 0x00e5, 0x00db,
0x0189, 0x02e7, 0x02e1, 0x02d0, 0x0375, 0x0372, 0x01b7, 0x0004,
0x00f3, 0x0078, 0x0076, 0x0073, 0x00e3, 0x00df, 0x018c, 0x02ea,
0x02e6, 0x02e0, 0x02d1, 0x02c8, 0x02c2, 0x00df, 0x01b4, 0x0006,
0x00ca, 0x00e0, 0x00de, 0x00da, 0x00d8, 0x0185, 0x0182, 0x017d,
0x016c, 0x0378, 0x01bb, 0x02c3, 0x01b8, 0x01b5, 0x06c0, 0x0004,
0x02eb, 0x00d3, 0x00d2, 0x00d0, 0x0172, 0x017b, 0x02de, 0x02d3,
0x02ca, 0x06c7, 0x0373, 0x036d, 0x036c, 0x0d83, 0x0361, 0x0002,
0x0179, 0x0171, 0x0066, 0x00bb, 0x02d6, 0x02d2, 0x0166, 0x02c7,
0x02c5, 0x0362, 0x06c6, 0x0367, 0x0d82, 0x0366, 0x01b2, 0x0000,
0x000c, 0x000a, 0x0007, 0x000b, 0x000a, 0x0011, 0x000b, 0x0009,
0x000d, 0x000c, 0x000a, 0x0007, 0x0005, 0x0003, 0x0001, 0x0003,
};
static const uint8_t mpa_huffbits_16[256] = {
1, 4, 6, 8, 9, 9, 10, 10,
11, 11, 11, 12, 12, 12, 13, 9,
3, 4, 6, 7, 8, 9, 9, 9,
10, 10, 10, 11, 12, 11, 12, 8,
6, 6, 7, 8, 9, 9, 10, 10,
11, 10, 11, 11, 11, 12, 12, 9,
8, 7, 8, 9, 9, 10, 10, 10,
11, 11, 12, 12, 12, 13, 13, 10,
9, 8, 9, 9, 10, 10, 11, 11,
11, 12, 12, 12, 13, 13, 13, 9,
9, 8, 9, 9, 10, 11, 11, 12,
11, 12, 12, 13, 13, 13, 14, 10,
10, 9, 9, 10, 11, 11, 11, 11,
12, 12, 12, 12, 13, 13, 14, 10,
10, 9, 10, 10, 11, 11, 11, 12,
12, 13, 13, 13, 13, 15, 15, 10,
10, 10, 10, 11, 11, 11, 12, 12,
13, 13, 13, 13, 14, 14, 14, 10,
11, 10, 10, 11, 11, 12, 12, 13,
13, 13, 13, 14, 13, 14, 13, 11,
11, 11, 10, 11, 12, 12, 12, 12,
13, 14, 14, 14, 15, 15, 14, 10,
12, 11, 11, 11, 12, 12, 13, 14,
14, 14, 14, 14, 14, 13, 14, 11,
12, 12, 12, 12, 12, 13, 13, 13,
13, 15, 14, 14, 14, 14, 16, 11,
14, 12, 12, 12, 13, 13, 14, 14,
14, 16, 15, 15, 15, 17, 15, 11,
13, 13, 11, 12, 14, 14, 13, 14,
14, 15, 16, 15, 17, 15, 14, 11,
9, 8, 8, 9, 9, 10, 10, 10,
11, 11, 11, 11, 11, 11, 11, 8,
};
static const uint16_t mpa_huffcodes_24[256] = {
0x000f, 0x000d, 0x002e, 0x0050, 0x0092, 0x0106, 0x00f8, 0x01b2,
0x01aa, 0x029d, 0x028d, 0x0289, 0x026d, 0x0205, 0x0408, 0x0058,
0x000e, 0x000c, 0x0015, 0x0026, 0x0047, 0x0082, 0x007a, 0x00d8,
0x00d1, 0x00c6, 0x0147, 0x0159, 0x013f, 0x0129, 0x0117, 0x002a,
0x002f, 0x0016, 0x0029, 0x004a, 0x0044, 0x0080, 0x0078, 0x00dd,
0x00cf, 0x00c2, 0x00b6, 0x0154, 0x013b, 0x0127, 0x021d, 0x0012,
0x0051, 0x0027, 0x004b, 0x0046, 0x0086, 0x007d, 0x0074, 0x00dc,
0x00cc, 0x00be, 0x00b2, 0x0145, 0x0137, 0x0125, 0x010f, 0x0010,
0x0093, 0x0048, 0x0045, 0x0087, 0x007f, 0x0076, 0x0070, 0x00d2,
0x00c8, 0x00bc, 0x0160, 0x0143, 0x0132, 0x011d, 0x021c, 0x000e,
0x0107, 0x0042, 0x0081, 0x007e, 0x0077, 0x0072, 0x00d6, 0x00ca,
0x00c0, 0x00b4, 0x0155, 0x013d, 0x012d, 0x0119, 0x0106, 0x000c,
0x00f9, 0x007b, 0x0079, 0x0075, 0x0071, 0x00d7, 0x00ce, 0x00c3,
0x00b9, 0x015b, 0x014a, 0x0134, 0x0123, 0x0110, 0x0208, 0x000a,
0x01b3, 0x0073, 0x006f, 0x006d, 0x00d3, 0x00cb, 0x00c4, 0x00bb,
0x0161, 0x014c, 0x0139, 0x012a, 0x011b, 0x0213, 0x017d, 0x0011,
0x01ab, 0x00d4, 0x00d0, 0x00cd, 0x00c9, 0x00c1, 0x00ba, 0x00b1,
0x00a9, 0x0140, 0x012f, 0x011e, 0x010c, 0x0202, 0x0179, 0x0010,
0x014f, 0x00c7, 0x00c5, 0x00bf, 0x00bd, 0x00b5, 0x00ae, 0x014d,
0x0141, 0x0131, 0x0121, 0x0113, 0x0209, 0x017b, 0x0173, 0x000b,
0x029c, 0x00b8, 0x00b7, 0x00b3, 0x00af, 0x0158, 0x014b, 0x013a,
0x0130, 0x0122, 0x0115, 0x0212, 0x017f, 0x0175, 0x016e, 0x000a,
0x028c, 0x015a, 0x00ab, 0x00a8, 0x00a4, 0x013e, 0x0135, 0x012b,
0x011f, 0x0114, 0x0107, 0x0201, 0x0177, 0x0170, 0x016a, 0x0006,
0x0288, 0x0142, 0x013c, 0x0138, 0x0133, 0x012e, 0x0124, 0x011c,
0x010d, 0x0105, 0x0200, 0x0178, 0x0172, 0x016c, 0x0167, 0x0004,
0x026c, 0x012c, 0x0128, 0x0126, 0x0120, 0x011a, 0x0111, 0x010a,
0x0203, 0x017c, 0x0176, 0x0171, 0x016d, 0x0169, 0x0165, 0x0002,
0x0409, 0x0118, 0x0116, 0x0112, 0x010b, 0x0108, 0x0103, 0x017e,
0x017a, 0x0174, 0x016f, 0x016b, 0x0168, 0x0166, 0x0164, 0x0000,
0x002b, 0x0014, 0x0013, 0x0011, 0x000f, 0x000d, 0x000b, 0x0009,
0x0007, 0x0006, 0x0004, 0x0007, 0x0005, 0x0003, 0x0001, 0x0003,
};
static const uint8_t mpa_huffbits_24[256] = {
4, 4, 6, 7, 8, 9, 9, 10,
10, 11, 11, 11, 11, 11, 12, 9,
4, 4, 5, 6, 7, 8, 8, 9,
9, 9, 10, 10, 10, 10, 10, 8,
6, 5, 6, 7, 7, 8, 8, 9,
9, 9, 9, 10, 10, 10, 11, 7,
7, 6, 7, 7, 8, 8, 8, 9,
9, 9, 9, 10, 10, 10, 10, 7,
8, 7, 7, 8, 8, 8, 8, 9,
9, 9, 10, 10, 10, 10, 11, 7,
9, 7, 8, 8, 8, 8, 9, 9,
9, 9, 10, 10, 10, 10, 10, 7,
9, 8, 8, 8, 8, 9, 9, 9,
9, 10, 10, 10, 10, 10, 11, 7,
10, 8, 8, 8, 9, 9, 9, 9,
10, 10, 10, 10, 10, 11, 11, 8,
10, 9, 9, 9, 9, 9, 9, 9,
9, 10, 10, 10, 10, 11, 11, 8,
10, 9, 9, 9, 9, 9, 9, 10,
10, 10, 10, 10, 11, 11, 11, 8,
11, 9, 9, 9, 9, 10, 10, 10,
10, 10, 10, 11, 11, 11, 11, 8,
11, 10, 9, 9, 9, 10, 10, 10,
10, 10, 10, 11, 11, 11, 11, 8,
11, 10, 10, 10, 10, 10, 10, 10,
10, 10, 11, 11, 11, 11, 11, 8,
11, 10, 10, 10, 10, 10, 10, 10,
11, 11, 11, 11, 11, 11, 11, 8,
12, 10, 10, 10, 10, 10, 10, 11,
11, 11, 11, 11, 11, 11, 11, 8,
8, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 8, 8, 8, 8, 4,
};
static const HuffTable mpa_huff_tables[16] = {
{ 1, NULL, NULL },
{ 2, mpa_huffbits_1, mpa_huffcodes_1 },
{ 3, mpa_huffbits_2, mpa_huffcodes_2 },
{ 3, mpa_huffbits_3, mpa_huffcodes_3 },
{ 4, mpa_huffbits_5, mpa_huffcodes_5 },
{ 4, mpa_huffbits_6, mpa_huffcodes_6 },
{ 6, mpa_huffbits_7, mpa_huffcodes_7 },
{ 6, mpa_huffbits_8, mpa_huffcodes_8 },
{ 6, mpa_huffbits_9, mpa_huffcodes_9 },
{ 8, mpa_huffbits_10, mpa_huffcodes_10 },
{ 8, mpa_huffbits_11, mpa_huffcodes_11 },
{ 8, mpa_huffbits_12, mpa_huffcodes_12 },
{ 16, mpa_huffbits_13, mpa_huffcodes_13 },
{ 16, mpa_huffbits_15, mpa_huffcodes_15 },
{ 16, mpa_huffbits_16, mpa_huffcodes_16 },
{ 16, mpa_huffbits_24, mpa_huffcodes_24 },
};
static const uint8_t mpa_huff_data[32][2] = {
{ 0, 0 },
{ 1, 0 },
{ 2, 0 },
{ 3, 0 },
{ 0, 0 },
{ 4, 0 },
{ 5, 0 },
{ 6, 0 },
{ 7, 0 },
{ 8, 0 },
{ 9, 0 },
{ 10, 0 },
{ 11, 0 },
{ 12, 0 },
{ 0, 0 },
{ 13, 0 },
{ 14, 1 },
{ 14, 2 },
{ 14, 3 },
{ 14, 4 },
{ 14, 6 },
{ 14, 8 },
{ 14, 10 },
{ 14, 13 },
{ 15, 4 },
{ 15, 5 },
{ 15, 6 },
{ 15, 7 },
{ 15, 8 },
{ 15, 9 },
{ 15, 11 },
{ 15, 13 },
};
/* huffman tables for quadrules */
static const uint8_t mpa_quad_codes[2][16] = {
{ 1, 5, 4, 5, 6, 5, 4, 4, 7, 3, 6, 0, 7, 2, 3, 1, },
{ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, },
};
static const uint8_t mpa_quad_bits[2][16] = {
{ 1, 4, 4, 5, 4, 6, 5, 6, 4, 5, 5, 6, 5, 6, 6, 6, },
{ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, },
};
/* band size tables */
static const uint8_t band_size_long[9][22] = {
{ 4, 4, 4, 4, 4, 4, 6, 6, 8, 8, 10,
12, 16, 20, 24, 28, 34, 42, 50, 54, 76, 158, }, /* 44100 */
{ 4, 4, 4, 4, 4, 4, 6, 6, 6, 8, 10,
12, 16, 18, 22, 28, 34, 40, 46, 54, 54, 192, }, /* 48000 */
{ 4, 4, 4, 4, 4, 4, 6, 6, 8, 10, 12,
16, 20, 24, 30, 38, 46, 56, 68, 84, 102, 26, }, /* 32000 */
{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 22050 */
{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
18, 22, 26, 32, 38, 46, 52, 64, 70, 76, 36, }, /* 24000 */
{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 16000 */
{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 11025 */
{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 12000 */
{ 12, 12, 12, 12, 12, 12, 16, 20, 24, 28, 32,
40, 48, 56, 64, 76, 90, 2, 2, 2, 2, 2, }, /* 8000 */
};
static const uint8_t band_size_short[9][13] = {
{ 4, 4, 4, 4, 6, 8, 10, 12, 14, 18, 22, 30, 56, }, /* 44100 */
{ 4, 4, 4, 4, 6, 6, 10, 12, 14, 16, 20, 26, 66, }, /* 48000 */
{ 4, 4, 4, 4, 6, 8, 12, 16, 20, 26, 34, 42, 12, }, /* 32000 */
{ 4, 4, 4, 6, 6, 8, 10, 14, 18, 26, 32, 42, 18, }, /* 22050 */
{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 32, 44, 12, }, /* 24000 */
{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 30, 40, 18, }, /* 16000 */
{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 30, 40, 18, }, /* 11025 */
{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 30, 40, 18, }, /* 12000 */
{ 8, 8, 8, 12, 16, 20, 24, 28, 36, 2, 2, 2, 26, }, /* 8000 */
};
static const uint8_t mpa_pretab[2][22] = {
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 2, 0 },
};
/* table for alias reduction (XXX: store it as integer !) */
static const float ci_table[8] = {
-0.6, -0.535, -0.33, -0.185, -0.095, -0.041, -0.0142, -0.0037,
};
#endif /* AVCODEC_MPEGAUDIODECTAB_H */

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2011 Mans Rullgard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/thread.h"
#include "mpegaudiodsp.h"
#include "dct.h"
#include "dct32.h"
static AVOnce mpadsp_float_table_init = AV_ONCE_INIT;
static AVOnce mpadsp_fixed_table_init = AV_ONCE_INIT;
av_cold void ff_mpadsp_init(MPADSPContext *s)
{
DCTContext dct;
ff_dct_init(&dct, 5, DCT_II);
ff_thread_once(&mpadsp_float_table_init, &ff_init_mpadsp_tabs_float);
ff_thread_once(&mpadsp_fixed_table_init, &ff_init_mpadsp_tabs_fixed);
s->apply_window_float = ff_mpadsp_apply_window_float;
s->apply_window_fixed = ff_mpadsp_apply_window_fixed;
s->dct32_float = dct.dct32;
s->dct32_fixed = ff_dct32_fixed;
s->imdct36_blocks_float = ff_imdct36_blocks_float;
s->imdct36_blocks_fixed = ff_imdct36_blocks_fixed;
if (ARCH_AARCH64) ff_mpadsp_init_aarch64(s);
if (ARCH_ARM) ff_mpadsp_init_arm(s);
if (ARCH_PPC) ff_mpadsp_init_ppc(s);
if (ARCH_X86) ff_mpadsp_init_x86(s);
if (HAVE_MIPSFPU) ff_mpadsp_init_mipsfpu(s);
if (HAVE_MIPSDSP) ff_mpadsp_init_mipsdsp(s);
}

View File

@ -0,0 +1,94 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_MPEGAUDIODSP_H
#define AVCODEC_MPEGAUDIODSP_H
#include <stddef.h>
#include <stdint.h>
#include "libavutil/common.h"
typedef struct MPADSPContext {
void (*apply_window_float)(float *synth_buf, float *window,
int *dither_state, float *samples,
ptrdiff_t incr);
void (*apply_window_fixed)(int32_t *synth_buf, int32_t *window,
int *dither_state, int16_t *samples,
ptrdiff_t incr);
void (*dct32_float)(float *dst, const float *src);
void (*dct32_fixed)(int *dst, const int *src);
void (*imdct36_blocks_float)(float *out, float *buf, float *in,
int count, int switch_point, int block_type);
void (*imdct36_blocks_fixed)(int *out, int *buf, int *in,
int count, int switch_point, int block_type);
} MPADSPContext;
void ff_mpadsp_init(MPADSPContext *s);
extern int32_t ff_mpa_synth_window_fixed[];
extern float ff_mpa_synth_window_float[];
extern const int32_t ff_mpa_enwindow[257];
void ff_mpa_synth_filter_fixed(MPADSPContext *s,
int32_t *synth_buf_ptr, int *synth_buf_offset,
int32_t *window, int *dither_state,
int16_t *samples, ptrdiff_t incr,
int32_t *sb_samples);
void ff_mpa_synth_filter_float(MPADSPContext *s,
float *synth_buf_ptr, int *synth_buf_offset,
float *window, int *dither_state,
float *samples, ptrdiff_t incr,
float *sb_samples);
void ff_mpadsp_init_aarch64(MPADSPContext *s);
void ff_mpadsp_init_arm(MPADSPContext *s);
void ff_mpadsp_init_ppc(MPADSPContext *s);
void ff_mpadsp_init_x86(MPADSPContext *s);
void ff_mpadsp_init_mipsfpu(MPADSPContext *s);
void ff_mpadsp_init_mipsdsp(MPADSPContext *s);
void ff_mpa_synth_init_float(float *window);
void ff_mpa_synth_init_fixed(int32_t *window);
void ff_mpadsp_apply_window_float(float *synth_buf, float *window,
int *dither_state, float *samples,
ptrdiff_t incr);
void ff_mpadsp_apply_window_fixed(int32_t *synth_buf, int32_t *window,
int *dither_state, int16_t *samples,
ptrdiff_t incr);
void ff_imdct36_blocks_float(float *out, float *buf, float *in,
int count, int switch_point, int block_type);
void ff_imdct36_blocks_fixed(int *out, int *buf, int *in,
int count, int switch_point, int block_type);
void ff_init_mpadsp_tabs_float(void);
void ff_init_mpadsp_tabs_fixed(void);
/** For SSE implementation, MDCT_BUF_SIZE/2 should be 128-bit aligned */
#define MDCT_BUF_SIZE FFALIGN(36, 2*4)
extern int ff_mdct_win_fixed[8][MDCT_BUF_SIZE];
extern float ff_mdct_win_float[8][MDCT_BUF_SIZE];
#endif /* AVCODEC_MPEGAUDIODSP_H */

View File

@ -0,0 +1,56 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "mpegaudiodsp.h"
/* half mpeg encoding window (full precision) */
const int32_t ff_mpa_enwindow[257] = {
0, -1, -1, -1, -1, -1, -1, -2,
-2, -2, -2, -3, -3, -4, -4, -5,
-5, -6, -7, -7, -8, -9, -10, -11,
-13, -14, -16, -17, -19, -21, -24, -26,
-29, -31, -35, -38, -41, -45, -49, -53,
-58, -63, -68, -73, -79, -85, -91, -97,
-104, -111, -117, -125, -132, -139, -147, -154,
-161, -169, -176, -183, -190, -196, -202, -208,
213, 218, 222, 225, 227, 228, 228, 227,
224, 221, 215, 208, 200, 189, 177, 163,
146, 127, 106, 83, 57, 29, -2, -36,
-72, -111, -153, -197, -244, -294, -347, -401,
-459, -519, -581, -645, -711, -779, -848, -919,
-991, -1064, -1137, -1210, -1283, -1356, -1428, -1498,
-1567, -1634, -1698, -1759, -1817, -1870, -1919, -1962,
-2001, -2032, -2057, -2075, -2085, -2087, -2080, -2063,
2037, 2000, 1952, 1893, 1822, 1739, 1644, 1535,
1414, 1280, 1131, 970, 794, 605, 402, 185,
-45, -288, -545, -814, -1095, -1388, -1692, -2006,
-2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788,
-5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597,
-7910, -8209, -8491, -8755, -8998, -9219, -9416, -9585,
-9727, -9838, -9916, -9959, -9966, -9935, -9863, -9750,
-9592, -9389, -9139, -8840, -8492, -8092, -7640, -7134,
6574, 5959, 5288, 4561, 3776, 2935, 2037, 1082,
70, -998, -2122, -3300, -4533, -5818, -7154, -8540,
-9975,-11455,-12980,-14548,-16155,-17799,-19478,-21189,
-22929,-24694,-26482,-28289,-30112,-31947,-33791,-35640,
-37489,-39336,-41176,-43006,-44821,-46617,-48390,-50137,
-51853,-53534,-55178,-56778,-58333,-59838,-61289,-62684,
-64019,-65290,-66494,-67629,-68692,-69679,-70590,-71420,
-72169,-72835,-73415,-73908,-74313,-74630,-74856,-74992,
75038,
};

View File

@ -0,0 +1,20 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define USE_FLOATS 0
#include "mpegaudiodsp_template.c"

View File

@ -0,0 +1,20 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define USE_FLOATS 1
#include "mpegaudiodsp_template.c"

View File

@ -0,0 +1,401 @@
/*
* Copyright (c) 2001, 2002 Fabrice Bellard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/mem.h"
#include "dct32.h"
#include "mathops.h"
#include "mpegaudiodsp.h"
#include "mpegaudio.h"
#if USE_FLOATS
#define RENAME(n) n##_float
static inline float round_sample(float *sum)
{
float sum1=*sum;
*sum = 0;
return sum1;
}
#define MACS(rt, ra, rb) rt+=(ra)*(rb)
#define MULS(ra, rb) ((ra)*(rb))
#define MULH3(x, y, s) ((s)*(y)*(x))
#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
#define MULLx(x, y, s) ((y)*(x))
#define FIXHR(x) ((float)(x))
#define FIXR(x) ((float)(x))
#define SHR(a,b) ((a)*(1.0f/(1<<(b))))
#else
#define RENAME(n) n##_fixed
#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
static inline int round_sample(int64_t *sum)
{
int sum1;
sum1 = (int)((*sum) >> OUT_SHIFT);
*sum &= (1<<OUT_SHIFT)-1;
return av_clip_int16(sum1);
}
# define MULS(ra, rb) MUL64(ra, rb)
# define MACS(rt, ra, rb) MAC64(rt, ra, rb)
# define MLSS(rt, ra, rb) MLS64(rt, ra, rb)
# define MULH3(x, y, s) MULH((s)*(x), y)
# define MULLx(x, y, s) MULL((int)(x),(y),s)
# define SHR(a,b) (((int)(a))>>(b))
# define FIXR(a) ((int)((a) * FRAC_ONE + 0.5))
# define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5))
#endif
/** Window for MDCT. Actually only the elements in [0,17] and
[MDCT_BUF_SIZE/2, MDCT_BUF_SIZE/2 + 17] are actually used. The rest
is just to preserve alignment for SIMD implementations.
*/
DECLARE_ALIGNED(16, INTFLOAT, RENAME(ff_mdct_win))[8][MDCT_BUF_SIZE];
DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512+256];
#define SUM8(op, sum, w, p) \
{ \
op(sum, (w)[0 * 64], (p)[0 * 64]); \
op(sum, (w)[1 * 64], (p)[1 * 64]); \
op(sum, (w)[2 * 64], (p)[2 * 64]); \
op(sum, (w)[3 * 64], (p)[3 * 64]); \
op(sum, (w)[4 * 64], (p)[4 * 64]); \
op(sum, (w)[5 * 64], (p)[5 * 64]); \
op(sum, (w)[6 * 64], (p)[6 * 64]); \
op(sum, (w)[7 * 64], (p)[7 * 64]); \
}
#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
{ \
INTFLOAT tmp;\
tmp = p[0 * 64];\
op1(sum1, (w1)[0 * 64], tmp);\
op2(sum2, (w2)[0 * 64], tmp);\
tmp = p[1 * 64];\
op1(sum1, (w1)[1 * 64], tmp);\
op2(sum2, (w2)[1 * 64], tmp);\
tmp = p[2 * 64];\
op1(sum1, (w1)[2 * 64], tmp);\
op2(sum2, (w2)[2 * 64], tmp);\
tmp = p[3 * 64];\
op1(sum1, (w1)[3 * 64], tmp);\
op2(sum2, (w2)[3 * 64], tmp);\
tmp = p[4 * 64];\
op1(sum1, (w1)[4 * 64], tmp);\
op2(sum2, (w2)[4 * 64], tmp);\
tmp = p[5 * 64];\
op1(sum1, (w1)[5 * 64], tmp);\
op2(sum2, (w2)[5 * 64], tmp);\
tmp = p[6 * 64];\
op1(sum1, (w1)[6 * 64], tmp);\
op2(sum2, (w2)[6 * 64], tmp);\
tmp = p[7 * 64];\
op1(sum1, (w1)[7 * 64], tmp);\
op2(sum2, (w2)[7 * 64], tmp);\
}
void RENAME(ff_mpadsp_apply_window)(MPA_INT *synth_buf, MPA_INT *window,
int *dither_state, OUT_INT *samples,
ptrdiff_t incr)
{
register const MPA_INT *w, *w2, *p;
int j;
OUT_INT *samples2;
#if USE_FLOATS
float sum, sum2;
#else
int64_t sum, sum2;
#endif
/* copy to avoid wrap */
memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf));
samples2 = samples + 31 * incr;
w = window;
w2 = window + 31;
sum = *dither_state;
p = synth_buf + 16;
SUM8(MACS, sum, w, p);
p = synth_buf + 48;
SUM8(MLSS, sum, w + 32, p);
*samples = round_sample(&sum);
samples += incr;
w++;
/* we calculate two samples at the same time to avoid one memory
access per two sample */
for(j=1;j<16;j++) {
sum2 = 0;
p = synth_buf + 16 + j;
SUM8P2(sum, MACS, sum2, MLSS, w, w2, p);
p = synth_buf + 48 - j;
SUM8P2(sum, MLSS, sum2, MLSS, w + 32, w2 + 32, p);
*samples = round_sample(&sum);
samples += incr;
sum += sum2;
*samples2 = round_sample(&sum);
samples2 -= incr;
w++;
w2--;
}
p = synth_buf + 32;
SUM8(MLSS, sum, w + 32, p);
*samples = round_sample(&sum);
*dither_state= sum;
}
/* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
32 samples. */
void RENAME(ff_mpa_synth_filter)(MPADSPContext *s, MPA_INT *synth_buf_ptr,
int *synth_buf_offset,
MPA_INT *window, int *dither_state,
OUT_INT *samples, ptrdiff_t incr,
MPA_INT *sb_samples)
{
MPA_INT *synth_buf;
int offset;
offset = *synth_buf_offset;
synth_buf = synth_buf_ptr + offset;
s->RENAME(dct32)(synth_buf, sb_samples);
s->RENAME(apply_window)(synth_buf, window, dither_state, samples, incr);
offset = (offset - 32) & 511;
*synth_buf_offset = offset;
}
av_cold void RENAME(ff_mpa_synth_init)(MPA_INT *window)
{
int i, j;
/* max = 18760, max sum over all 16 coefs : 44736 */
for(i=0;i<257;i++) {
INTFLOAT v;
v = ff_mpa_enwindow[i];
#if USE_FLOATS
v *= 1.0 / (1LL<<(16 + FRAC_BITS));
#endif
window[i] = v;
if ((i & 63) != 0)
v = -v;
if (i != 0)
window[512 - i] = v;
}
// Needed for avoiding shuffles in ASM implementations
for(i=0; i < 8; i++)
for(j=0; j < 16; j++)
window[512+16*i+j] = window[64*i+32-j];
for(i=0; i < 8; i++)
for(j=0; j < 16; j++)
window[512+128+16*i+j] = window[64*i+48-j];
}
av_cold void RENAME(ff_init_mpadsp_tabs)(void)
{
int i, j;
/* compute mdct windows */
for (i = 0; i < 36; i++) {
for (j = 0; j < 4; j++) {
double d;
if (j == 2 && i % 3 != 1)
continue;
d = sin(M_PI * (i + 0.5) / 36.0);
if (j == 1) {
if (i >= 30) d = 0;
else if (i >= 24) d = sin(M_PI * (i - 18 + 0.5) / 12.0);
else if (i >= 18) d = 1;
} else if (j == 3) {
if (i < 6) d = 0;
else if (i < 12) d = sin(M_PI * (i - 6 + 0.5) / 12.0);
else if (i < 18) d = 1;
}
//merge last stage of imdct into the window coefficients
d *= 0.5 * IMDCT_SCALAR / cos(M_PI * (2 * i + 19) / 72);
if (j == 2)
RENAME(ff_mdct_win)[j][i/3] = FIXHR((d / (1<<5)));
else {
int idx = i < 18 ? i : i + (MDCT_BUF_SIZE/2 - 18);
RENAME(ff_mdct_win)[j][idx] = FIXHR((d / (1<<5)));
}
}
}
/* NOTE: we do frequency inversion adter the MDCT by changing
the sign of the right window coefs */
for (j = 0; j < 4; j++) {
for (i = 0; i < MDCT_BUF_SIZE; i += 2) {
RENAME(ff_mdct_win)[j + 4][i ] = RENAME(ff_mdct_win)[j][i ];
RENAME(ff_mdct_win)[j + 4][i + 1] = -RENAME(ff_mdct_win)[j][i + 1];
}
}
}
/* cos(pi*i/18) */
#define C1 FIXHR(0.98480775301220805936/2)
#define C2 FIXHR(0.93969262078590838405/2)
#define C3 FIXHR(0.86602540378443864676/2)
#define C4 FIXHR(0.76604444311897803520/2)
#define C5 FIXHR(0.64278760968653932632/2)
#define C6 FIXHR(0.5/2)
#define C7 FIXHR(0.34202014332566873304/2)
#define C8 FIXHR(0.17364817766693034885/2)
/* 0.5 / cos(pi*(2*i+1)/36) */
static const INTFLOAT icos36[9] = {
FIXR(0.50190991877167369479),
FIXR(0.51763809020504152469), //0
FIXR(0.55168895948124587824),
FIXR(0.61038729438072803416),
FIXR(0.70710678118654752439), //1
FIXR(0.87172339781054900991),
FIXR(1.18310079157624925896),
FIXR(1.93185165257813657349), //2
FIXR(5.73685662283492756461),
};
/* 0.5 / cos(pi*(2*i+1)/36) */
static const INTFLOAT icos36h[9] = {
FIXHR(0.50190991877167369479/2),
FIXHR(0.51763809020504152469/2), //0
FIXHR(0.55168895948124587824/2),
FIXHR(0.61038729438072803416/2),
FIXHR(0.70710678118654752439/2), //1
FIXHR(0.87172339781054900991/2),
FIXHR(1.18310079157624925896/4),
FIXHR(1.93185165257813657349/4), //2
// FIXHR(5.73685662283492756461),
};
/* using Lee like decomposition followed by hand coded 9 points DCT */
static void imdct36(INTFLOAT *out, INTFLOAT *buf, SUINTFLOAT *in, INTFLOAT *win)
{
int i, j;
SUINTFLOAT t0, t1, t2, t3, s0, s1, s2, s3;
SUINTFLOAT tmp[18], *tmp1, *in1;
for (i = 17; i >= 1; i--)
in[i] += in[i-1];
for (i = 17; i >= 3; i -= 2)
in[i] += in[i-2];
for (j = 0; j < 2; j++) {
tmp1 = tmp + j;
in1 = in + j;
t2 = in1[2*4] + in1[2*8] - in1[2*2];
t3 = in1[2*0] + SHR(in1[2*6],1);
t1 = in1[2*0] - in1[2*6];
tmp1[ 6] = t1 - SHR(t2,1);
tmp1[16] = t1 + t2;
t0 = MULH3(in1[2*2] + in1[2*4] , C2, 2);
t1 = MULH3(in1[2*4] - in1[2*8] , -2*C8, 1);
t2 = MULH3(in1[2*2] + in1[2*8] , -C4, 2);
tmp1[10] = t3 - t0 - t2;
tmp1[ 2] = t3 + t0 + t1;
tmp1[14] = t3 + t2 - t1;
tmp1[ 4] = MULH3(in1[2*5] + in1[2*7] - in1[2*1], -C3, 2);
t2 = MULH3(in1[2*1] + in1[2*5], C1, 2);
t3 = MULH3(in1[2*5] - in1[2*7], -2*C7, 1);
t0 = MULH3(in1[2*3], C3, 2);
t1 = MULH3(in1[2*1] + in1[2*7], -C5, 2);
tmp1[ 0] = t2 + t3 + t0;
tmp1[12] = t2 + t1 - t0;
tmp1[ 8] = t3 - t1 - t0;
}
i = 0;
for (j = 0; j < 4; j++) {
t0 = tmp[i];
t1 = tmp[i + 2];
s0 = t1 + t0;
s2 = t1 - t0;
t2 = tmp[i + 1];
t3 = tmp[i + 3];
s1 = MULH3(t3 + t2, icos36h[ j], 2);
s3 = MULLx(t3 - t2, icos36 [8 - j], FRAC_BITS);
t0 = s0 + s1;
t1 = s0 - s1;
out[(9 + j) * SBLIMIT] = MULH3(t1, win[ 9 + j], 1) + buf[4*(9 + j)];
out[(8 - j) * SBLIMIT] = MULH3(t1, win[ 8 - j], 1) + buf[4*(8 - j)];
buf[4 * ( 9 + j )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + j], 1);
buf[4 * ( 8 - j )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 8 - j], 1);
t0 = s2 + s3;
t1 = s2 - s3;
out[(9 + 8 - j) * SBLIMIT] = MULH3(t1, win[ 9 + 8 - j], 1) + buf[4*(9 + 8 - j)];
out[ j * SBLIMIT] = MULH3(t1, win[ j], 1) + buf[4*( j)];
buf[4 * ( 9 + 8 - j )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + 8 - j], 1);
buf[4 * ( j )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + j], 1);
i += 4;
}
s0 = tmp[16];
s1 = MULH3(tmp[17], icos36h[4], 2);
t0 = s0 + s1;
t1 = s0 - s1;
out[(9 + 4) * SBLIMIT] = MULH3(t1, win[ 9 + 4], 1) + buf[4*(9 + 4)];
out[(8 - 4) * SBLIMIT] = MULH3(t1, win[ 8 - 4], 1) + buf[4*(8 - 4)];
buf[4 * ( 9 + 4 )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + 4], 1);
buf[4 * ( 8 - 4 )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 8 - 4], 1);
}
void RENAME(ff_imdct36_blocks)(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in,
int count, int switch_point, int block_type)
{
int j;
for (j=0 ; j < count; j++) {
/* apply window & overlap with previous buffer */
/* select window */
int win_idx = (switch_point && j < 2) ? 0 : block_type;
INTFLOAT *win = RENAME(ff_mdct_win)[win_idx + (4 & -(j & 1))];
imdct36(out, buf, in, win);
in += 18;
buf += ((j&3) != 3 ? 1 : (72-3));
out++;
}
}

View File

@ -0,0 +1,253 @@
/*
* Simple IDCT
*
* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* simpleidct in C.
*/
#include "libavutil/intreadwrite.h"
#include "avcodec.h"
#include "mathops.h"
#include "simple_idct.h"
#define IN_IDCT_DEPTH 16
#define BIT_DEPTH 8
#include "simple_idct_template.c"
#undef BIT_DEPTH
#define BIT_DEPTH 10
#include "simple_idct_template.c"
#define EXTRA_SHIFT 2
#include "simple_idct_template.c"
#undef EXTRA_SHIFT
#undef BIT_DEPTH
#define BIT_DEPTH 12
#include "simple_idct_template.c"
#undef BIT_DEPTH
#undef IN_IDCT_DEPTH
#define IN_IDCT_DEPTH 32
#define BIT_DEPTH 10
#include "simple_idct_template.c"
#undef BIT_DEPTH
#undef IN_IDCT_DEPTH
/* 2x4x8 idct */
#define CN_SHIFT 12
#define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5))
#define C1 C_FIX(0.6532814824)
#define C2 C_FIX(0.2705980501)
/* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized,
and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
#define C_SHIFT (4+1+12)
static inline void idct4col_put(uint8_t *dest, ptrdiff_t line_size, const int16_t *col)
{
int c0, c1, c2, c3, a0, a1, a2, a3;
a0 = col[8*0];
a1 = col[8*2];
a2 = col[8*4];
a3 = col[8*6];
c0 = ((a0 + a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
c2 = ((a0 - a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
c1 = a1 * C1 + a3 * C2;
c3 = a1 * C2 - a3 * C1;
dest[0] = av_clip_uint8((c0 + c1) >> C_SHIFT);
dest += line_size;
dest[0] = av_clip_uint8((c2 + c3) >> C_SHIFT);
dest += line_size;
dest[0] = av_clip_uint8((c2 - c3) >> C_SHIFT);
dest += line_size;
dest[0] = av_clip_uint8((c0 - c1) >> C_SHIFT);
}
#define BF(k) \
{\
int a0, a1;\
a0 = ptr[k];\
a1 = ptr[8 + k];\
ptr[k] = a0 + a1;\
ptr[8 + k] = a0 - a1;\
}
/* only used by DV codec. The input must be interlaced. 128 is added
to the pixels before clamping to avoid systematic error
(1024*sqrt(2)) offset would be needed otherwise. */
/* XXX: I think a 1.0/sqrt(2) normalization should be needed to
compensate the extra butterfly stage - I don't have the full DV
specification */
void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{
int i;
int16_t *ptr;
/* butterfly */
ptr = block;
for(i=0;i<4;i++) {
BF(0);
BF(1);
BF(2);
BF(3);
BF(4);
BF(5);
BF(6);
BF(7);
ptr += 2 * 8;
}
/* IDCT8 on each line */
for(i=0; i<8; i++) {
idctRowCondDC_int16_8bit(block + i*8, 0);
}
/* IDCT4 and store */
for(i=0;i<8;i++) {
idct4col_put(dest + i, 2 * line_size, block + i);
idct4col_put(dest + line_size + i, 2 * line_size, block + 8 + i);
}
}
/* 8x4 & 4x8 WMV2 IDCT */
#undef CN_SHIFT
#undef C_SHIFT
#undef C_FIX
#undef C1
#undef C2
#define CN_SHIFT 12
#define C_FIX(x) ((int)((x) * M_SQRT2 * (1 << CN_SHIFT) + 0.5))
#define C1 C_FIX(0.6532814824)
#define C2 C_FIX(0.2705980501)
#define C3 C_FIX(0.5)
#define C_SHIFT (4+1+12)
static inline void idct4col_add(uint8_t *dest, ptrdiff_t line_size, const int16_t *col)
{
int c0, c1, c2, c3, a0, a1, a2, a3;
a0 = col[8*0];
a1 = col[8*1];
a2 = col[8*2];
a3 = col[8*3];
c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
c1 = a1 * C1 + a3 * C2;
c3 = a1 * C2 - a3 * C1;
dest[0] = av_clip_uint8(dest[0] + ((c0 + c1) >> C_SHIFT));
dest += line_size;
dest[0] = av_clip_uint8(dest[0] + ((c2 + c3) >> C_SHIFT));
dest += line_size;
dest[0] = av_clip_uint8(dest[0] + ((c2 - c3) >> C_SHIFT));
dest += line_size;
dest[0] = av_clip_uint8(dest[0] + ((c0 - c1) >> C_SHIFT));
}
#define RN_SHIFT 15
#define R_FIX(x) ((int)((x) * M_SQRT2 * (1 << RN_SHIFT) + 0.5))
#define R1 R_FIX(0.6532814824)
#define R2 R_FIX(0.2705980501)
#define R3 R_FIX(0.5)
#define R_SHIFT 11
static inline void idct4row(int16_t *row)
{
int c0, c1, c2, c3, a0, a1, a2, a3;
a0 = row[0];
a1 = row[1];
a2 = row[2];
a3 = row[3];
c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));
c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));
c1 = a1 * R1 + a3 * R2;
c3 = a1 * R2 - a3 * R1;
row[0]= (c0 + c1) >> R_SHIFT;
row[1]= (c2 + c3) >> R_SHIFT;
row[2]= (c2 - c3) >> R_SHIFT;
row[3]= (c0 - c1) >> R_SHIFT;
}
void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{
int i;
/* IDCT8 on each line */
for(i=0; i<4; i++) {
idctRowCondDC_int16_8bit(block + i*8, 0);
}
/* IDCT4 and store */
for(i=0;i<8;i++) {
idct4col_add(dest + i, line_size, block + i);
}
}
void ff_simple_idct48_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{
int i;
/* IDCT4 on each line */
for(i=0; i<8; i++) {
idct4row(block + i*8);
}
/* IDCT8 and store */
for(i=0; i<4; i++){
idctSparseColAdd_int16_8bit(dest + i, line_size, block + i);
}
}
void ff_simple_idct44_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{
int i;
/* IDCT4 on each line */
for(i=0; i<4; i++) {
idct4row(block + i*8);
}
/* IDCT4 and store */
for(i=0; i<4; i++){
idct4col_add(dest + i, line_size, block + i);
}
}
void ff_prores_idct(int16_t *block, const int16_t *qmat)
{
int i;
for (i = 0; i < 64; i++)
block[i] *= qmat[i];
for (i = 0; i < 8; i++)
idctRowCondDC_extrashift_10(block + i*8, 2);
for (i = 0; i < 8; i++) {
block[i] += 8192;
idctSparseCol_extrashift_10(block + i);
}
}

View File

@ -0,0 +1,63 @@
/*
* Simple IDCT
*
* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* simple idct header.
*/
#ifndef AVCODEC_SIMPLE_IDCT_H
#define AVCODEC_SIMPLE_IDCT_H
#include <stddef.h>
#include <stdint.h>
void ff_simple_idct_put_int16_8bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_add_int16_8bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_int16_8bit(int16_t *block);
void ff_simple_idct_put_int16_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_add_int16_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_int16_10bit(int16_t *block);
void ff_simple_idct_put_int32_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_add_int32_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_int32_10bit(int16_t *block);
void ff_simple_idct_put_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_add_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_int16_12bit(int16_t *block);
/**
* Special version of ff_simple_idct_int16_10bit() which does dequantization
* and scales by a factor of 2 more between the two IDCTs to account
* for larger scale of input coefficients.
*/
void ff_prores_idct(int16_t *block, const int16_t *qmat);
void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct48_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct44_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
#endif /* AVCODEC_SIMPLE_IDCT_H */

View File

@ -0,0 +1,371 @@
/*
* Simple IDCT
*
* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* simpleidct in C.
*/
/* Based upon some commented-out C code from mpeg2dec (idct_mmx.c
* written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>). */
#include "simple_idct.h"
#include "bit_depth_template.c"
#undef W1
#undef W2
#undef W3
#undef W4
#undef W5
#undef W6
#undef W7
#undef ROW_SHIFT
#undef COL_SHIFT
#undef DC_SHIFT
#undef MUL
#undef MAC
#if BIT_DEPTH == 8
#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
#define ROW_SHIFT 11
#define COL_SHIFT 20
#define DC_SHIFT 3
#define MUL(a, b) MUL16(a, b)
#define MAC(a, b, c) MAC16(a, b, c)
#elif BIT_DEPTH == 10 || BIT_DEPTH == 12
# if BIT_DEPTH == 10
#define W1 22725 // 90901
#define W2 21407 // 85627
#define W3 19265 // 77062
#define W4 16384 // 65535
#define W5 12873 // 51491
#define W6 8867 // 35468
#define W7 4520 // 18081
# ifdef EXTRA_SHIFT
#define ROW_SHIFT 13
#define COL_SHIFT 18
#define DC_SHIFT 1
# elif IN_IDCT_DEPTH == 32
#define ROW_SHIFT 13
#define COL_SHIFT 21
#define DC_SHIFT 2
# else
#define ROW_SHIFT 12
#define COL_SHIFT 19
#define DC_SHIFT 2
# endif
# else
#define W1 45451
#define W2 42813
#define W3 38531
#define W4 32767
#define W5 25746
#define W6 17734
#define W7 9041
#define ROW_SHIFT 16
#define COL_SHIFT 17
#define DC_SHIFT -1
# endif
#define MUL(a, b) ((a) * (b))
#define MAC(a, b, c) ((a) += (b) * (c))
#else
#error "Unsupported bitdepth"
#endif
#ifdef EXTRA_SHIFT
static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
#else
static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift)
#endif
{
SUINT a0, a1, a2, a3, b0, b1, b2, b3;
// TODO: Add DC-only support for int32_t input
#if IN_IDCT_DEPTH == 16
#if HAVE_FAST_64BIT
#define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN)
if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
uint64_t temp;
if (DC_SHIFT - extra_shift >= 0) {
temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
} else {
temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
}
temp += temp * (1 << 16);
temp += temp * ((uint64_t) 1 << 32);
AV_WN64A(row, temp);
AV_WN64A(row + 4, temp);
return;
}
#else
if (!(AV_RN32A(row+2) |
AV_RN32A(row+4) |
AV_RN32A(row+6) |
row[1])) {
uint32_t temp;
if (DC_SHIFT - extra_shift >= 0) {
temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
} else {
temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
}
temp += temp * (1 << 16);
AV_WN32A(row, temp);
AV_WN32A(row+2, temp);
AV_WN32A(row+4, temp);
AV_WN32A(row+6, temp);
return;
}
#endif
#endif
a0 = (W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
a1 = a0;
a2 = a0;
a3 = a0;
a0 += W2 * row[2];
a1 += W6 * row[2];
a2 -= W6 * row[2];
a3 -= W2 * row[2];
b0 = MUL(W1, row[1]);
MAC(b0, W3, row[3]);
b1 = MUL(W3, row[1]);
MAC(b1, -W7, row[3]);
b2 = MUL(W5, row[1]);
MAC(b2, -W1, row[3]);
b3 = MUL(W7, row[1]);
MAC(b3, -W5, row[3]);
#if IN_IDCT_DEPTH == 32
if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) {
#else
if (AV_RN64A(row + 4)) {
#endif
a0 += W4*row[4] + W6*row[6];
a1 += - W4*row[4] - W2*row[6];
a2 += - W4*row[4] + W2*row[6];
a3 += W4*row[4] - W6*row[6];
MAC(b0, W5, row[5]);
MAC(b0, W7, row[7]);
MAC(b1, -W1, row[5]);
MAC(b1, -W5, row[7]);
MAC(b2, W7, row[5]);
MAC(b2, W3, row[7]);
MAC(b3, W3, row[5]);
MAC(b3, -W1, row[7]);
}
row[0] = (int)(a0 + b0) >> (ROW_SHIFT + extra_shift);
row[7] = (int)(a0 - b0) >> (ROW_SHIFT + extra_shift);
row[1] = (int)(a1 + b1) >> (ROW_SHIFT + extra_shift);
row[6] = (int)(a1 - b1) >> (ROW_SHIFT + extra_shift);
row[2] = (int)(a2 + b2) >> (ROW_SHIFT + extra_shift);
row[5] = (int)(a2 - b2) >> (ROW_SHIFT + extra_shift);
row[3] = (int)(a3 + b3) >> (ROW_SHIFT + extra_shift);
row[4] = (int)(a3 - b3) >> (ROW_SHIFT + extra_shift);
}
#define IDCT_COLS do { \
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \
a1 = a0; \
a2 = a0; \
a3 = a0; \
\
a0 += W2*col[8*2]; \
a1 += W6*col[8*2]; \
a2 += -W6*col[8*2]; \
a3 += -W2*col[8*2]; \
\
b0 = MUL(W1, col[8*1]); \
b1 = MUL(W3, col[8*1]); \
b2 = MUL(W5, col[8*1]); \
b3 = MUL(W7, col[8*1]); \
\
MAC(b0, W3, col[8*3]); \
MAC(b1, -W7, col[8*3]); \
MAC(b2, -W1, col[8*3]); \
MAC(b3, -W5, col[8*3]); \
\
if (col[8*4]) { \
a0 += W4*col[8*4]; \
a1 += -W4*col[8*4]; \
a2 += -W4*col[8*4]; \
a3 += W4*col[8*4]; \
} \
\
if (col[8*5]) { \
MAC(b0, W5, col[8*5]); \
MAC(b1, -W1, col[8*5]); \
MAC(b2, W7, col[8*5]); \
MAC(b3, W3, col[8*5]); \
} \
\
if (col[8*6]) { \
a0 += W6*col[8*6]; \
a1 += -W2*col[8*6]; \
a2 += W2*col[8*6]; \
a3 += -W6*col[8*6]; \
} \
\
if (col[8*7]) { \
MAC(b0, W7, col[8*7]); \
MAC(b1, -W5, col[8*7]); \
MAC(b2, W3, col[8*7]); \
MAC(b3, -W1, col[8*7]); \
} \
} while (0)
#ifdef EXTRA_SHIFT
static inline void FUNC(idctSparseCol_extrashift)(int16_t *col)
#else
static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
idctin *col)
{
SUINT a0, a1, a2, a3, b0, b1, b2, b3;
IDCT_COLS;
dest[0] = av_clip_pixel((int)(a0 + b0) >> COL_SHIFT);
dest += line_size;
dest[0] = av_clip_pixel((int)(a1 + b1) >> COL_SHIFT);
dest += line_size;
dest[0] = av_clip_pixel((int)(a2 + b2) >> COL_SHIFT);
dest += line_size;
dest[0] = av_clip_pixel((int)(a3 + b3) >> COL_SHIFT);
dest += line_size;
dest[0] = av_clip_pixel((int)(a3 - b3) >> COL_SHIFT);
dest += line_size;
dest[0] = av_clip_pixel((int)(a2 - b2) >> COL_SHIFT);
dest += line_size;
dest[0] = av_clip_pixel((int)(a1 - b1) >> COL_SHIFT);
dest += line_size;
dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT);
}
static inline void FUNC6(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
idctin *col)
{
int a0, a1, a2, a3, b0, b1, b2, b3;
IDCT_COLS;
dest[0] = av_clip_pixel(dest[0] + ((a0 + b0) >> COL_SHIFT));
dest += line_size;
dest[0] = av_clip_pixel(dest[0] + ((a1 + b1) >> COL_SHIFT));
dest += line_size;
dest[0] = av_clip_pixel(dest[0] + ((a2 + b2) >> COL_SHIFT));
dest += line_size;
dest[0] = av_clip_pixel(dest[0] + ((a3 + b3) >> COL_SHIFT));
dest += line_size;
dest[0] = av_clip_pixel(dest[0] + ((a3 - b3) >> COL_SHIFT));
dest += line_size;
dest[0] = av_clip_pixel(dest[0] + ((a2 - b2) >> COL_SHIFT));
dest += line_size;
dest[0] = av_clip_pixel(dest[0] + ((a1 - b1) >> COL_SHIFT));
dest += line_size;
dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT));
}
static inline void FUNC6(idctSparseCol)(idctin *col)
#endif
{
int a0, a1, a2, a3, b0, b1, b2, b3;
IDCT_COLS;
col[0 ] = ((a0 + b0) >> COL_SHIFT);
col[8 ] = ((a1 + b1) >> COL_SHIFT);
col[16] = ((a2 + b2) >> COL_SHIFT);
col[24] = ((a3 + b3) >> COL_SHIFT);
col[32] = ((a3 - b3) >> COL_SHIFT);
col[40] = ((a2 - b2) >> COL_SHIFT);
col[48] = ((a1 - b1) >> COL_SHIFT);
col[56] = ((a0 - b0) >> COL_SHIFT);
}
#ifndef EXTRA_SHIFT
void FUNC6(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block_)
{
idctin *block = (idctin *)block_;
pixel *dest = (pixel *)dest_;
int i;
line_size /= sizeof(pixel);
for (i = 0; i < 8; i++)
FUNC6(idctRowCondDC)(block + i*8, 0);
for (i = 0; i < 8; i++)
FUNC6(idctSparseColPut)(dest + i, line_size, block + i);
}
#if IN_IDCT_DEPTH == 16
void FUNC6(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block)
{
pixel *dest = (pixel *)dest_;
int i;
line_size /= sizeof(pixel);
for (i = 0; i < 8; i++)
FUNC6(idctRowCondDC)(block + i*8, 0);
for (i = 0; i < 8; i++)
FUNC6(idctSparseColAdd)(dest + i, line_size, block + i);
}
void FUNC6(ff_simple_idct)(int16_t *block)
{
int i;
for (i = 0; i < 8; i++)
FUNC6(idctRowCondDC)(block + i*8, 0);
for (i = 0; i < 8; i++)
FUNC6(idctSparseCol)(block + i);
}
#endif
#endif

View File

@ -0,0 +1,491 @@
;******************************************************************************
;* 32 point SSE-optimized DCT transform
;* Copyright (c) 2010 Vitor Sessak
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
SECTION_RODATA 32
ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
ps_cos_vec: dd 0.500603, 0.505471, 0.515447, 0.531043
dd 0.553104, 0.582935, 0.622504, 0.674808
dd -10.190008, -3.407609, -2.057781, -1.484165
dd -1.169440, -0.972568, -0.839350, -0.744536
dd 0.502419, 0.522499, 0.566944, 0.646822
dd 0.788155, 1.060678, 1.722447, 5.101149
dd 0.509796, 0.601345, 0.899976, 2.562916
dd 0.509796, 0.601345, 0.899976, 2.562916
dd 1.000000, 1.000000, 1.306563, 0.541196
dd 1.000000, 1.000000, 1.306563, 0.541196
dd 1.000000, 0.707107, 1.000000, -0.707107
dd 1.000000, 0.707107, 1.000000, -0.707107
dd 0.707107, 0.707107, 0.707107, 0.707107
%macro BUTTERFLY 4
subps %4, %1, %2
addps %2, %2, %1
mulps %1, %4, %3
%endmacro
%macro BUTTERFLY0 5
%if cpuflag(sse2) && notcpuflag(avx)
pshufd %4, %1, %5
xorps %1, %2
addps %1, %4
mulps %1, %3
%else
shufps %4, %1, %1, %5
xorps %1, %1, %2
addps %4, %4, %1
mulps %1, %4, %3
%endif
%endmacro
%macro BUTTERFLY2 4
BUTTERFLY0 %1, %2, %3, %4, 0x1b
%endmacro
%macro BUTTERFLY3 4
BUTTERFLY0 %1, %2, %3, %4, 0xb1
%endmacro
%macro BUTTERFLY3V 5
movaps m%5, m%1
addps m%1, m%2
subps m%5, m%2
SWAP %2, %5
mulps m%2, [ps_cos_vec+192]
movaps m%5, m%3
addps m%3, m%4
subps m%4, m%5
mulps m%4, [ps_cos_vec+192]
%endmacro
%macro PASS6_AND_PERMUTE 0
mov tmpd, [outq+4]
movss m7, [outq+72]
addss m7, [outq+76]
movss m3, [outq+56]
addss m3, [outq+60]
addss m4, m3
movss m2, [outq+52]
addss m2, m3
movss m3, [outq+104]
addss m3, [outq+108]
addss m1, m3
addss m5, m4
movss [outq+ 16], m1
movss m1, [outq+100]
addss m1, m3
movss m3, [outq+40]
movss [outq+ 48], m1
addss m3, [outq+44]
movss m1, [outq+100]
addss m4, m3
addss m3, m2
addss m1, [outq+108]
movss [outq+ 40], m3
addss m2, [outq+36]
movss m3, [outq+8]
movss [outq+ 56], m2
addss m3, [outq+12]
movss [outq+ 32], m3
movss m3, [outq+80]
movss [outq+ 8], m5
movss [outq+ 80], m1
movss m2, [outq+52]
movss m5, [outq+120]
addss m5, [outq+124]
movss m1, [outq+64]
addss m2, [outq+60]
addss m0, m5
addss m5, [outq+116]
mov [outq+64], tmpd
addss m6, m0
addss m1, m6
mov tmpd, [outq+12]
mov [outq+ 96], tmpd
movss [outq+ 4], m1
movss m1, [outq+24]
movss [outq+ 24], m4
movss m4, [outq+88]
addss m4, [outq+92]
addss m3, m4
addss m4, [outq+84]
mov tmpd, [outq+108]
addss m1, [outq+28]
addss m0, m1
addss m1, m5
addss m6, m3
addss m3, m0
addss m0, m7
addss m5, [outq+20]
addss m7, m1
movss [outq+ 12], m6
mov [outq+112], tmpd
movss m6, [outq+28]
movss [outq+ 28], m0
movss m0, [outq+36]
movss [outq+ 36], m7
addss m1, m4
movss m7, [outq+116]
addss m0, m2
addss m7, [outq+124]
movss [outq+ 72], m0
movss m0, [outq+44]
addss m2, m0
movss [outq+ 44], m1
movss [outq+ 88], m2
addss m0, [outq+60]
mov tmpd, [outq+60]
mov [outq+120], tmpd
movss [outq+104], m0
addss m4, m5
addss m5, [outq+68]
movss [outq+52], m4
movss [outq+60], m5
movss m4, [outq+68]
movss m5, [outq+20]
movss [outq+ 20], m3
addss m5, m7
addss m7, m6
addss m4, m5
movss m2, [outq+84]
addss m2, [outq+92]
addss m5, m2
movss [outq+ 68], m4
addss m2, m7
movss m4, [outq+76]
movss [outq+ 84], m2
movss [outq+ 76], m5
addss m7, m4
addss m6, [outq+124]
addss m4, m6
addss m6, [outq+92]
movss [outq+100], m4
movss [outq+108], m6
movss m6, [outq+92]
movss [outq+92], m7
addss m6, [outq+124]
movss [outq+116], m6
%endmacro
INIT_YMM avx
SECTION .text
%if HAVE_AVX_EXTERNAL
; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in)
cglobal dct32_float, 2,3,8, out, in, tmp
; pass 1
vmovaps m4, [inq+0]
vinsertf128 m5, m5, [inq+96], 1
vinsertf128 m5, m5, [inq+112], 0
vshufps m5, m5, m5, 0x1b
BUTTERFLY m4, m5, [ps_cos_vec], m6
vmovaps m2, [inq+64]
vinsertf128 m6, m6, [inq+32], 1
vinsertf128 m6, m6, [inq+48], 0
vshufps m6, m6, m6, 0x1b
BUTTERFLY m2, m6, [ps_cos_vec+32], m0
; pass 2
BUTTERFLY m5, m6, [ps_cos_vec+64], m0
BUTTERFLY m4, m2, [ps_cos_vec+64], m7
; pass 3
vperm2f128 m3, m6, m4, 0x31
vperm2f128 m1, m6, m4, 0x20
vshufps m3, m3, m3, 0x1b
BUTTERFLY m1, m3, [ps_cos_vec+96], m6
vperm2f128 m4, m5, m2, 0x20
vperm2f128 m5, m5, m2, 0x31
vshufps m5, m5, m5, 0x1b
BUTTERFLY m4, m5, [ps_cos_vec+96], m6
; pass 4
vmovaps m6, [ps_p1p1m1m1+0]
vmovaps m2, [ps_cos_vec+128]
BUTTERFLY2 m5, m6, m2, m7
BUTTERFLY2 m4, m6, m2, m7
BUTTERFLY2 m1, m6, m2, m7
BUTTERFLY2 m3, m6, m2, m7
; pass 5
vshufps m6, m6, m6, 0xcc
vmovaps m2, [ps_cos_vec+160]
BUTTERFLY3 m5, m6, m2, m7
BUTTERFLY3 m4, m6, m2, m7
BUTTERFLY3 m1, m6, m2, m7
BUTTERFLY3 m3, m6, m2, m7
vperm2f128 m6, m3, m3, 0x31
vmovaps [outq], m3
vextractf128 [outq+64], m5, 1
vextractf128 [outq+32], m5, 0
vextractf128 [outq+80], m4, 1
vextractf128 [outq+48], m4, 0
vperm2f128 m0, m1, m1, 0x31
vmovaps [outq+96], m1
vzeroupper
; pass 6, no SIMD...
INIT_XMM
PASS6_AND_PERMUTE
RET
%endif
%if ARCH_X86_64
%define SPILL SWAP
%define UNSPILL SWAP
%macro PASS5 0
nop ; FIXME code alignment
SWAP 5, 8
SWAP 4, 12
SWAP 6, 14
SWAP 7, 13
SWAP 0, 15
PERMUTE 9,10, 10,12, 11,14, 12,9, 13,11, 14,13
TRANSPOSE4x4PS 8, 9, 10, 11, 0
BUTTERFLY3V 8, 9, 10, 11, 0
addps m10, m11
TRANSPOSE4x4PS 12, 13, 14, 15, 0
BUTTERFLY3V 12, 13, 14, 15, 0
addps m14, m15
addps m12, m14
addps m14, m13
addps m13, m15
%endmacro
%macro PASS6 0
SWAP 9, 12
SWAP 11, 14
movss [outq+0x00], m8
pshuflw m0, m8, 0xe
movss [outq+0x10], m9
pshuflw m1, m9, 0xe
movss [outq+0x20], m10
pshuflw m2, m10, 0xe
movss [outq+0x30], m11
pshuflw m3, m11, 0xe
movss [outq+0x40], m12
pshuflw m4, m12, 0xe
movss [outq+0x50], m13
pshuflw m5, m13, 0xe
movss [outq+0x60], m14
pshuflw m6, m14, 0xe
movaps [outq+0x70], m15
pshuflw m7, m15, 0xe
addss m0, m1
addss m1, m2
movss [outq+0x08], m0
addss m2, m3
movss [outq+0x18], m1
addss m3, m4
movss [outq+0x28], m2
addss m4, m5
movss [outq+0x38], m3
addss m5, m6
movss [outq+0x48], m4
addss m6, m7
movss [outq+0x58], m5
movss [outq+0x68], m6
movss [outq+0x78], m7
PERMUTE 1,8, 3,9, 5,10, 7,11, 9,12, 11,13, 13,14, 8,1, 10,3, 12,5, 14,7
movhlps m0, m1
pshufd m1, m1, 3
SWAP 0, 2, 4, 6, 8, 10, 12, 14
SWAP 1, 3, 5, 7, 9, 11, 13, 15
%rep 7
movhlps m0, m1
pshufd m1, m1, 3
addss m15, m1
SWAP 0, 2, 4, 6, 8, 10, 12, 14
SWAP 1, 3, 5, 7, 9, 11, 13, 15
%endrep
%assign i 4
%rep 15
addss m0, m1
movss [outq+i], m0
SWAP 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
%assign i i+8
%endrep
%endmacro
%else ; ARCH_X86_32
%macro SPILL 2 ; xmm#, mempos
movaps [outq+(%2-8)*16], m%1
%endmacro
%macro UNSPILL 2
movaps m%1, [outq+(%2-8)*16]
%endmacro
%define PASS6 PASS6_AND_PERMUTE
%macro PASS5 0
movaps m2, [ps_cos_vec+160]
shufps m3, m3, 0xcc
BUTTERFLY3 m5, m3, m2, m1
SPILL 5, 8
UNSPILL 1, 9
BUTTERFLY3 m1, m3, m2, m5
SPILL 1, 14
BUTTERFLY3 m4, m3, m2, m5
SPILL 4, 12
BUTTERFLY3 m7, m3, m2, m5
SPILL 7, 13
UNSPILL 5, 10
BUTTERFLY3 m5, m3, m2, m7
SPILL 5, 10
UNSPILL 4, 11
BUTTERFLY3 m4, m3, m2, m7
SPILL 4, 11
BUTTERFLY3 m6, m3, m2, m7
SPILL 6, 9
BUTTERFLY3 m0, m3, m2, m7
SPILL 0, 15
%endmacro
%endif
; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
%macro DCT32_FUNC 0
cglobal dct32_float, 2, 3, 16, out, in, tmp
; pass 1
movaps m0, [inq+0]
LOAD_INV m1, [inq+112]
BUTTERFLY m0, m1, [ps_cos_vec], m3
movaps m7, [inq+64]
LOAD_INV m4, [inq+48]
BUTTERFLY m7, m4, [ps_cos_vec+32], m3
; pass 2
movaps m2, [ps_cos_vec+64]
BUTTERFLY m1, m4, m2, m3
SPILL 1, 11
SPILL 4, 8
; pass 1
movaps m1, [inq+16]
LOAD_INV m6, [inq+96]
BUTTERFLY m1, m6, [ps_cos_vec+16], m3
movaps m4, [inq+80]
LOAD_INV m5, [inq+32]
BUTTERFLY m4, m5, [ps_cos_vec+48], m3
; pass 2
BUTTERFLY m0, m7, m2, m3
movaps m2, [ps_cos_vec+80]
BUTTERFLY m6, m5, m2, m3
BUTTERFLY m1, m4, m2, m3
; pass 3
movaps m2, [ps_cos_vec+96]
shufps m1, m1, 0x1b
BUTTERFLY m0, m1, m2, m3
SPILL 0, 15
SPILL 1, 14
UNSPILL 0, 8
shufps m5, m5, 0x1b
BUTTERFLY m0, m5, m2, m3
UNSPILL 1, 11
shufps m6, m6, 0x1b
BUTTERFLY m1, m6, m2, m3
SPILL 1, 11
shufps m4, m4, 0x1b
BUTTERFLY m7, m4, m2, m3
; pass 4
movaps m3, [ps_p1p1m1m1+0]
movaps m2, [ps_cos_vec+128]
BUTTERFLY2 m5, m3, m2, m1
BUTTERFLY2 m0, m3, m2, m1
SPILL 0, 9
BUTTERFLY2 m6, m3, m2, m1
SPILL 6, 10
UNSPILL 0, 11
BUTTERFLY2 m0, m3, m2, m1
SPILL 0, 11
BUTTERFLY2 m4, m3, m2, m1
BUTTERFLY2 m7, m3, m2, m1
UNSPILL 6, 14
BUTTERFLY2 m6, m3, m2, m1
UNSPILL 0, 15
BUTTERFLY2 m0, m3, m2, m1
PASS5
PASS6
RET
%endmacro
%macro LOAD_INV 2
%if cpuflag(sse2)
pshufd %1, %2, 0x1b
%elif cpuflag(sse)
movaps %1, %2
shufps %1, %1, 0x1b
%endif
%endmacro
%if ARCH_X86_32
INIT_XMM sse
DCT32_FUNC
%endif
INIT_XMM sse2
DCT32_FUNC

View File

@ -0,0 +1,41 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/dct.h"
void ff_dct32_float_sse(FFTSample *out, const FFTSample *in);
void ff_dct32_float_sse2(FFTSample *out, const FFTSample *in);
void ff_dct32_float_avx(FFTSample *out, const FFTSample *in);
av_cold void ff_dct_init_x86(DCTContext *s)
{
int cpu_flags = av_get_cpu_flags();
#if ARCH_X86_32
if (EXTERNAL_SSE(cpu_flags))
s->dct32 = ff_dct32_float_sse;
#endif
if (EXTERNAL_SSE2(cpu_flags))
s->dct32 = ff_dct32_float_sse2;
if (EXTERNAL_AVX_FAST(cpu_flags))
s->dct32 = ff_dct32_float_avx;
}

View File

@ -0,0 +1,594 @@
/*
* SIMD-optimized forward DCT
* The gcc porting is Copyright (c) 2001 Fabrice Bellard.
* cleanup/optimizations are Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
* SSE2 optimization is Copyright (c) 2004 Denes Balatoni.
*
* from fdctam32.c - AP922 MMX(3D-Now) forward-DCT
*
* Intel Application Note AP-922 - fast, precise implementation of DCT
* http://developer.intel.com/vtune/cbts/appnotes.htm
*
* Also of inspiration:
* a page about fdct at http://www.geocities.com/ssavekar/dct.htm
* Skal's fdct at http://skal.planet-d.net/coding/dct.html
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/common.h"
#include "libavutil/x86/asm.h"
#include "fdct.h"
#if HAVE_MMX_INLINE
//////////////////////////////////////////////////////////////////////
//
// constants for the forward DCT
// -----------------------------
//
// Be sure to check that your compiler is aligning all constants to QWORD
// (8-byte) memory boundaries! Otherwise the unaligned memory access will
// severely stall MMX execution.
//
//////////////////////////////////////////////////////////////////////
#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy
#define SHIFT_FRW_COL BITS_FRW_ACC
#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3)
#define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1))
//#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1))
#define X8(x) x,x,x,x,x,x,x,x
//concatenated table, for forward DCT transformation
DECLARE_ALIGNED(16, static const int16_t, fdct_tg_all_16)[24] = {
X8(13036), // tg * (2<<16) + 0.5
X8(27146), // tg * (2<<16) + 0.5
X8(-21746) // tg * (2<<16) + 0.5
};
DECLARE_ALIGNED(16, static const int16_t, ocos_4_16)[8] = {
X8(23170) //cos * (2<<15) + 0.5
};
DECLARE_ALIGNED(16, static const int16_t, fdct_one_corr)[8] = { X8(1) };
DECLARE_ALIGNED(8, static const int32_t, fdct_r_row)[2] = {RND_FRW_ROW, RND_FRW_ROW };
static const struct
{
DECLARE_ALIGNED(16, const int32_t, fdct_r_row_sse2)[4];
} fdct_r_row_sse2 =
{{
RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW
}};
//DECLARE_ALIGNED(16, static const long, fdct_r_row_sse2)[4] = {RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW};
DECLARE_ALIGNED(8, static const int16_t, tab_frw_01234567)[] = { // forward_dct coeff table
16384, 16384, 22725, 19266,
16384, 16384, 12873, 4520,
21407, 8867, 19266, -4520,
-8867, -21407, -22725, -12873,
16384, -16384, 12873, -22725,
-16384, 16384, 4520, 19266,
8867, -21407, 4520, -12873,
21407, -8867, 19266, -22725,
22725, 22725, 31521, 26722,
22725, 22725, 17855, 6270,
29692, 12299, 26722, -6270,
-12299, -29692, -31521, -17855,
22725, -22725, 17855, -31521,
-22725, 22725, 6270, 26722,
12299, -29692, 6270, -17855,
29692, -12299, 26722, -31521,
21407, 21407, 29692, 25172,
21407, 21407, 16819, 5906,
27969, 11585, 25172, -5906,
-11585, -27969, -29692, -16819,
21407, -21407, 16819, -29692,
-21407, 21407, 5906, 25172,
11585, -27969, 5906, -16819,
27969, -11585, 25172, -29692,
19266, 19266, 26722, 22654,
19266, 19266, 15137, 5315,
25172, 10426, 22654, -5315,
-10426, -25172, -26722, -15137,
19266, -19266, 15137, -26722,
-19266, 19266, 5315, 22654,
10426, -25172, 5315, -15137,
25172, -10426, 22654, -26722,
16384, 16384, 22725, 19266,
16384, 16384, 12873, 4520,
21407, 8867, 19266, -4520,
-8867, -21407, -22725, -12873,
16384, -16384, 12873, -22725,
-16384, 16384, 4520, 19266,
8867, -21407, 4520, -12873,
21407, -8867, 19266, -22725,
19266, 19266, 26722, 22654,
19266, 19266, 15137, 5315,
25172, 10426, 22654, -5315,
-10426, -25172, -26722, -15137,
19266, -19266, 15137, -26722,
-19266, 19266, 5315, 22654,
10426, -25172, 5315, -15137,
25172, -10426, 22654, -26722,
21407, 21407, 29692, 25172,
21407, 21407, 16819, 5906,
27969, 11585, 25172, -5906,
-11585, -27969, -29692, -16819,
21407, -21407, 16819, -29692,
-21407, 21407, 5906, 25172,
11585, -27969, 5906, -16819,
27969, -11585, 25172, -29692,
22725, 22725, 31521, 26722,
22725, 22725, 17855, 6270,
29692, 12299, 26722, -6270,
-12299, -29692, -31521, -17855,
22725, -22725, 17855, -31521,
-22725, 22725, 6270, 26722,
12299, -29692, 6270, -17855,
29692, -12299, 26722, -31521,
};
static const struct
{
DECLARE_ALIGNED(16, const int16_t, tab_frw_01234567_sse2)[256];
} tab_frw_01234567_sse2 =
{{
//DECLARE_ALIGNED(16, static const int16_t, tab_frw_01234567_sse2)[] = { // forward_dct coeff table
#define TABLE_SSE2 C4, C4, C1, C3, -C6, -C2, -C1, -C5, \
C4, C4, C5, C7, C2, C6, C3, -C7, \
-C4, C4, C7, C3, C6, -C2, C7, -C5, \
C4, -C4, C5, -C1, C2, -C6, C3, -C1,
// c1..c7 * cos(pi/4) * 2^15
#define C1 22725
#define C2 21407
#define C3 19266
#define C4 16384
#define C5 12873
#define C6 8867
#define C7 4520
TABLE_SSE2
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#define C1 31521
#define C2 29692
#define C3 26722
#define C4 22725
#define C5 17855
#define C6 12299
#define C7 6270
TABLE_SSE2
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#define C1 29692
#define C2 27969
#define C3 25172
#define C4 21407
#define C5 16819
#define C6 11585
#define C7 5906
TABLE_SSE2
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#define C1 26722
#define C2 25172
#define C3 22654
#define C4 19266
#define C5 15137
#define C6 10426
#define C7 5315
TABLE_SSE2
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#define C1 22725
#define C2 21407
#define C3 19266
#define C4 16384
#define C5 12873
#define C6 8867
#define C7 4520
TABLE_SSE2
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#define C1 26722
#define C2 25172
#define C3 22654
#define C4 19266
#define C5 15137
#define C6 10426
#define C7 5315
TABLE_SSE2
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#define C1 29692
#define C2 27969
#define C3 25172
#define C4 21407
#define C5 16819
#define C6 11585
#define C7 5906
TABLE_SSE2
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#define C1 31521
#define C2 29692
#define C3 26722
#define C4 22725
#define C5 17855
#define C6 12299
#define C7 6270
TABLE_SSE2
}};
#define S(s) AV_TOSTRING(s) //AV_STRINGIFY is too long
#define FDCT_COL(cpu, mm, mov)\
static av_always_inline void fdct_col_##cpu(const int16_t *in, int16_t *out, int offset)\
{\
__asm__ volatile (\
#mov" 16(%0), %%"#mm"0 \n\t" \
#mov" 96(%0), %%"#mm"1 \n\t" \
#mov" %%"#mm"0, %%"#mm"2 \n\t" \
#mov" 32(%0), %%"#mm"3 \n\t" \
"paddsw %%"#mm"1, %%"#mm"0 \n\t" \
#mov" 80(%0), %%"#mm"4 \n\t" \
"psllw $"S(SHIFT_FRW_COL)", %%"#mm"0 \n\t" \
#mov" (%0), %%"#mm"5 \n\t" \
"paddsw %%"#mm"3, %%"#mm"4 \n\t" \
"paddsw 112(%0), %%"#mm"5 \n\t" \
"psllw $"S(SHIFT_FRW_COL)", %%"#mm"4 \n\t" \
#mov" %%"#mm"0, %%"#mm"6 \n\t" \
"psubsw %%"#mm"1, %%"#mm"2 \n\t" \
#mov" 16(%1), %%"#mm"1 \n\t" \
"psubsw %%"#mm"4, %%"#mm"0 \n\t" \
#mov" 48(%0), %%"#mm"7 \n\t" \
"pmulhw %%"#mm"0, %%"#mm"1 \n\t" \
"paddsw 64(%0), %%"#mm"7 \n\t" \
"psllw $"S(SHIFT_FRW_COL)", %%"#mm"5 \n\t" \
"paddsw %%"#mm"4, %%"#mm"6 \n\t" \
"psllw $"S(SHIFT_FRW_COL)", %%"#mm"7 \n\t" \
#mov" %%"#mm"5, %%"#mm"4 \n\t" \
"psubsw %%"#mm"7, %%"#mm"5 \n\t" \
"paddsw %%"#mm"5, %%"#mm"1 \n\t" \
"paddsw %%"#mm"7, %%"#mm"4 \n\t" \
"por (%2), %%"#mm"1 \n\t" \
"psllw $"S(SHIFT_FRW_COL)"+1, %%"#mm"2 \n\t" \
"pmulhw 16(%1), %%"#mm"5 \n\t" \
#mov" %%"#mm"4, %%"#mm"7 \n\t" \
"psubsw 80(%0), %%"#mm"3 \n\t" \
"psubsw %%"#mm"6, %%"#mm"4 \n\t" \
#mov" %%"#mm"1, 32(%3) \n\t" \
"paddsw %%"#mm"6, %%"#mm"7 \n\t" \
#mov" 48(%0), %%"#mm"1 \n\t" \
"psllw $"S(SHIFT_FRW_COL)"+1, %%"#mm"3 \n\t" \
"psubsw 64(%0), %%"#mm"1 \n\t" \
#mov" %%"#mm"2, %%"#mm"6 \n\t" \
#mov" %%"#mm"4, 64(%3) \n\t" \
"paddsw %%"#mm"3, %%"#mm"2 \n\t" \
"pmulhw (%4), %%"#mm"2 \n\t" \
"psubsw %%"#mm"3, %%"#mm"6 \n\t" \
"pmulhw (%4), %%"#mm"6 \n\t" \
"psubsw %%"#mm"0, %%"#mm"5 \n\t" \
"por (%2), %%"#mm"5 \n\t" \
"psllw $"S(SHIFT_FRW_COL)", %%"#mm"1 \n\t" \
"por (%2), %%"#mm"2 \n\t" \
#mov" %%"#mm"1, %%"#mm"4 \n\t" \
#mov" (%0), %%"#mm"3 \n\t" \
"paddsw %%"#mm"6, %%"#mm"1 \n\t" \
"psubsw 112(%0), %%"#mm"3 \n\t" \
"psubsw %%"#mm"6, %%"#mm"4 \n\t" \
#mov" (%1), %%"#mm"0 \n\t" \
"psllw $"S(SHIFT_FRW_COL)", %%"#mm"3 \n\t" \
#mov" 32(%1), %%"#mm"6 \n\t" \
"pmulhw %%"#mm"1, %%"#mm"0 \n\t" \
#mov" %%"#mm"7, (%3) \n\t" \
"pmulhw %%"#mm"4, %%"#mm"6 \n\t" \
#mov" %%"#mm"5, 96(%3) \n\t" \
#mov" %%"#mm"3, %%"#mm"7 \n\t" \
#mov" 32(%1), %%"#mm"5 \n\t" \
"psubsw %%"#mm"2, %%"#mm"7 \n\t" \
"paddsw %%"#mm"2, %%"#mm"3 \n\t" \
"pmulhw %%"#mm"7, %%"#mm"5 \n\t" \
"paddsw %%"#mm"3, %%"#mm"0 \n\t" \
"paddsw %%"#mm"4, %%"#mm"6 \n\t" \
"pmulhw (%1), %%"#mm"3 \n\t" \
"por (%2), %%"#mm"0 \n\t" \
"paddsw %%"#mm"7, %%"#mm"5 \n\t" \
"psubsw %%"#mm"6, %%"#mm"7 \n\t" \
#mov" %%"#mm"0, 16(%3) \n\t" \
"paddsw %%"#mm"4, %%"#mm"5 \n\t" \
#mov" %%"#mm"7, 48(%3) \n\t" \
"psubsw %%"#mm"1, %%"#mm"3 \n\t" \
#mov" %%"#mm"5, 80(%3) \n\t" \
#mov" %%"#mm"3, 112(%3) \n\t" \
: \
: "r" (in + offset), "r" (fdct_tg_all_16), "r" (fdct_one_corr), \
"r" (out + offset), "r" (ocos_4_16)); \
}
FDCT_COL(mmx, mm, movq)
FDCT_COL(sse2, xmm, movdqa)
static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
{
__asm__ volatile(
#define FDCT_ROW_SSE2_H1(i,t) \
"movq " #i "(%0), %%xmm2 \n\t" \
"movq " #i "+8(%0), %%xmm0 \n\t" \
"movdqa " #t "+32(%1), %%xmm3 \n\t" \
"movdqa " #t "+48(%1), %%xmm7 \n\t" \
"movdqa " #t "(%1), %%xmm4 \n\t" \
"movdqa " #t "+16(%1), %%xmm5 \n\t"
#define FDCT_ROW_SSE2_H2(i,t) \
"movq " #i "(%0), %%xmm2 \n\t" \
"movq " #i "+8(%0), %%xmm0 \n\t" \
"movdqa " #t "+32(%1), %%xmm3 \n\t" \
"movdqa " #t "+48(%1), %%xmm7 \n\t"
#define FDCT_ROW_SSE2(i) \
"movq %%xmm2, %%xmm1 \n\t" \
"pshuflw $27, %%xmm0, %%xmm0 \n\t" \
"paddsw %%xmm0, %%xmm1 \n\t" \
"psubsw %%xmm0, %%xmm2 \n\t" \
"punpckldq %%xmm2, %%xmm1 \n\t" \
"pshufd $78, %%xmm1, %%xmm2 \n\t" \
"pmaddwd %%xmm2, %%xmm3 \n\t" \
"pmaddwd %%xmm1, %%xmm7 \n\t" \
"pmaddwd %%xmm5, %%xmm2 \n\t" \
"pmaddwd %%xmm4, %%xmm1 \n\t" \
"paddd %%xmm7, %%xmm3 \n\t" \
"paddd %%xmm2, %%xmm1 \n\t" \
"paddd %%xmm6, %%xmm3 \n\t" \
"paddd %%xmm6, %%xmm1 \n\t" \
"psrad %3, %%xmm3 \n\t" \
"psrad %3, %%xmm1 \n\t" \
"packssdw %%xmm3, %%xmm1 \n\t" \
"movdqa %%xmm1, " #i "(%4) \n\t"
"movdqa (%2), %%xmm6 \n\t"
FDCT_ROW_SSE2_H1(0,0)
FDCT_ROW_SSE2(0)
FDCT_ROW_SSE2_H2(64,0)
FDCT_ROW_SSE2(64)
FDCT_ROW_SSE2_H1(16,64)
FDCT_ROW_SSE2(16)
FDCT_ROW_SSE2_H2(112,64)
FDCT_ROW_SSE2(112)
FDCT_ROW_SSE2_H1(32,128)
FDCT_ROW_SSE2(32)
FDCT_ROW_SSE2_H2(96,128)
FDCT_ROW_SSE2(96)
FDCT_ROW_SSE2_H1(48,192)
FDCT_ROW_SSE2(48)
FDCT_ROW_SSE2_H2(80,192)
FDCT_ROW_SSE2(80)
:
: "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2),
"r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7")
);
}
static av_always_inline void fdct_row_mmxext(const int16_t *in, int16_t *out,
const int16_t *table)
{
__asm__ volatile (
"pshufw $0x1B, 8(%0), %%mm5 \n\t"
"movq (%0), %%mm0 \n\t"
"movq %%mm0, %%mm1 \n\t"
"paddsw %%mm5, %%mm0 \n\t"
"psubsw %%mm5, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"punpckldq %%mm1, %%mm0 \n\t"
"punpckhdq %%mm1, %%mm2 \n\t"
"movq (%1), %%mm1 \n\t"
"movq 8(%1), %%mm3 \n\t"
"movq 16(%1), %%mm4 \n\t"
"movq 24(%1), %%mm5 \n\t"
"movq 32(%1), %%mm6 \n\t"
"movq 40(%1), %%mm7 \n\t"
"pmaddwd %%mm0, %%mm1 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"pmaddwd %%mm0, %%mm4 \n\t"
"pmaddwd %%mm2, %%mm5 \n\t"
"pmaddwd %%mm0, %%mm6 \n\t"
"pmaddwd %%mm2, %%mm7 \n\t"
"pmaddwd 48(%1), %%mm0 \n\t"
"pmaddwd 56(%1), %%mm2 \n\t"
"paddd %%mm1, %%mm3 \n\t"
"paddd %%mm4, %%mm5 \n\t"
"paddd %%mm6, %%mm7 \n\t"
"paddd %%mm0, %%mm2 \n\t"
"movq (%2), %%mm0 \n\t"
"paddd %%mm0, %%mm3 \n\t"
"paddd %%mm0, %%mm5 \n\t"
"paddd %%mm0, %%mm7 \n\t"
"paddd %%mm0, %%mm2 \n\t"
"psrad $"S(SHIFT_FRW_ROW)", %%mm3 \n\t"
"psrad $"S(SHIFT_FRW_ROW)", %%mm5 \n\t"
"psrad $"S(SHIFT_FRW_ROW)", %%mm7 \n\t"
"psrad $"S(SHIFT_FRW_ROW)", %%mm2 \n\t"
"packssdw %%mm5, %%mm3 \n\t"
"packssdw %%mm2, %%mm7 \n\t"
"movq %%mm3, (%3) \n\t"
"movq %%mm7, 8(%3) \n\t"
:
: "r" (in), "r" (table), "r" (fdct_r_row), "r" (out));
}
static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
{
//FIXME reorder (I do not have an old MMX-only CPU here to benchmark ...)
__asm__ volatile(
"movd 12(%0), %%mm1 \n\t"
"punpcklwd 8(%0), %%mm1 \n\t"
"movq %%mm1, %%mm2 \n\t"
"psrlq $0x20, %%mm1 \n\t"
"movq 0(%0), %%mm0 \n\t"
"punpcklwd %%mm2, %%mm1 \n\t"
"movq %%mm0, %%mm5 \n\t"
"paddsw %%mm1, %%mm0 \n\t"
"psubsw %%mm1, %%mm5 \n\t"
"movq %%mm0, %%mm2 \n\t"
"punpckldq %%mm5, %%mm0 \n\t"
"punpckhdq %%mm5, %%mm2 \n\t"
"movq 0(%1), %%mm1 \n\t"
"movq 8(%1), %%mm3 \n\t"
"movq 16(%1), %%mm4 \n\t"
"movq 24(%1), %%mm5 \n\t"
"movq 32(%1), %%mm6 \n\t"
"movq 40(%1), %%mm7 \n\t"
"pmaddwd %%mm0, %%mm1 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"pmaddwd %%mm0, %%mm4 \n\t"
"pmaddwd %%mm2, %%mm5 \n\t"
"pmaddwd %%mm0, %%mm6 \n\t"
"pmaddwd %%mm2, %%mm7 \n\t"
"pmaddwd 48(%1), %%mm0 \n\t"
"pmaddwd 56(%1), %%mm2 \n\t"
"paddd %%mm1, %%mm3 \n\t"
"paddd %%mm4, %%mm5 \n\t"
"paddd %%mm6, %%mm7 \n\t"
"paddd %%mm0, %%mm2 \n\t"
"movq (%2), %%mm0 \n\t"
"paddd %%mm0, %%mm3 \n\t"
"paddd %%mm0, %%mm5 \n\t"
"paddd %%mm0, %%mm7 \n\t"
"paddd %%mm0, %%mm2 \n\t"
"psrad $"S(SHIFT_FRW_ROW)", %%mm3 \n\t"
"psrad $"S(SHIFT_FRW_ROW)", %%mm5 \n\t"
"psrad $"S(SHIFT_FRW_ROW)", %%mm7 \n\t"
"psrad $"S(SHIFT_FRW_ROW)", %%mm2 \n\t"
"packssdw %%mm5, %%mm3 \n\t"
"packssdw %%mm2, %%mm7 \n\t"
"movq %%mm3, 0(%3) \n\t"
"movq %%mm7, 8(%3) \n\t"
:
: "r" (in), "r" (table), "r" (fdct_r_row), "r" (out));
}
void ff_fdct_mmx(int16_t *block)
{
DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
int16_t * block1= (int16_t*)align_tmp;
const int16_t *table= tab_frw_01234567;
int i;
fdct_col_mmx(block, block1, 0);
fdct_col_mmx(block, block1, 4);
for(i=8;i>0;i--) {
fdct_row_mmx(block1, block, table);
block1 += 8;
table += 32;
block += 8;
}
}
#endif /* HAVE_MMX_INLINE */
#if HAVE_MMXEXT_INLINE
void ff_fdct_mmxext(int16_t *block)
{
DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
int16_t *block1= (int16_t*)align_tmp;
const int16_t *table= tab_frw_01234567;
int i;
fdct_col_mmx(block, block1, 0);
fdct_col_mmx(block, block1, 4);
for(i=8;i>0;i--) {
fdct_row_mmxext(block1, block, table);
block1 += 8;
table += 32;
block += 8;
}
}
#endif /* HAVE_MMXEXT_INLINE */
#if HAVE_SSE2_INLINE
void ff_fdct_sse2(int16_t *block)
{
DECLARE_ALIGNED(16, int64_t, align_tmp)[16];
int16_t * const block1= (int16_t*)align_tmp;
fdct_col_sse2(block, block1, 0);
fdct_row_sse2(block1, block);
}
#endif /* HAVE_SSE2_INLINE */

View File

@ -0,0 +1,28 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_X86_FDCT_H
#define AVCODEC_X86_FDCT_H
#include <stdint.h>
void ff_fdct_mmx(int16_t *block);
void ff_fdct_mmxext(int16_t *block);
void ff_fdct_sse2(int16_t *block);
#endif /* AVCODEC_X86_FDCT_H */

View File

@ -0,0 +1,44 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/fdctdsp.h"
#include "fdct.h"
av_cold void ff_fdctdsp_init_x86(FDCTDSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{
int cpu_flags = av_get_cpu_flags();
const int dct_algo = avctx->dct_algo;
if (!high_bit_depth) {
if ((dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) {
if (INLINE_MMX(cpu_flags))
c->fdct = ff_fdct_mmx;
if (INLINE_MMXEXT(cpu_flags))
c->fdct = ff_fdct_mmxext;
if (INLINE_SSE2(cpu_flags))
c->fdct = ff_fdct_sse2;
}
}
}

View File

@ -0,0 +1,183 @@
;******************************************************************************
;* SIMD-optimized IDCT-related routines
;* Copyright (c) 2008 Loren Merritt
;* Copyright (c) 2003-2013 Michael Niedermayer
;* Copyright (c) 2013 Daniel Kang
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
SECTION_RODATA
cextern pb_80
SECTION .text
;--------------------------------------------------------------------------
;void ff_put_signed_pixels_clamped(const int16_t *block, uint8_t *pixels,
; ptrdiff_t line_size)
;--------------------------------------------------------------------------
%macro PUT_SIGNED_PIXELS_CLAMPED_HALF 1
mova m1, [blockq+mmsize*0+%1]
mova m2, [blockq+mmsize*2+%1]
%if mmsize == 8
mova m3, [blockq+mmsize*4+%1]
mova m4, [blockq+mmsize*6+%1]
%endif
packsswb m1, [blockq+mmsize*1+%1]
packsswb m2, [blockq+mmsize*3+%1]
%if mmsize == 8
packsswb m3, [blockq+mmsize*5+%1]
packsswb m4, [blockq+mmsize*7+%1]
%endif
paddb m1, m0
paddb m2, m0
%if mmsize == 8
paddb m3, m0
paddb m4, m0
movq [pixelsq+lsizeq*0], m1
movq [pixelsq+lsizeq*1], m2
movq [pixelsq+lsizeq*2], m3
movq [pixelsq+lsize3q ], m4
%else
movq [pixelsq+lsizeq*0], m1
movhps [pixelsq+lsizeq*1], m1
movq [pixelsq+lsizeq*2], m2
movhps [pixelsq+lsize3q ], m2
%endif
%endmacro
%macro PUT_SIGNED_PIXELS_CLAMPED 1
cglobal put_signed_pixels_clamped, 3, 4, %1, block, pixels, lsize, lsize3
mova m0, [pb_80]
lea lsize3q, [lsizeq*3]
PUT_SIGNED_PIXELS_CLAMPED_HALF 0
lea pixelsq, [pixelsq+lsizeq*4]
PUT_SIGNED_PIXELS_CLAMPED_HALF 64
RET
%endmacro
INIT_MMX mmx
PUT_SIGNED_PIXELS_CLAMPED 0
INIT_XMM sse2
PUT_SIGNED_PIXELS_CLAMPED 3
;--------------------------------------------------------------------------
; void ff_put_pixels_clamped(const int16_t *block, uint8_t *pixels,
; ptrdiff_t line_size);
;--------------------------------------------------------------------------
; %1 = block offset
%macro PUT_PIXELS_CLAMPED_HALF 1
mova m0, [blockq+mmsize*0+%1]
mova m1, [blockq+mmsize*2+%1]
%if mmsize == 8
mova m2, [blockq+mmsize*4+%1]
mova m3, [blockq+mmsize*6+%1]
%endif
packuswb m0, [blockq+mmsize*1+%1]
packuswb m1, [blockq+mmsize*3+%1]
%if mmsize == 8
packuswb m2, [blockq+mmsize*5+%1]
packuswb m3, [blockq+mmsize*7+%1]
movq [pixelsq], m0
movq [lsizeq+pixelsq], m1
movq [2*lsizeq+pixelsq], m2
movq [lsize3q+pixelsq], m3
%else
movq [pixelsq], m0
movhps [lsizeq+pixelsq], m0
movq [2*lsizeq+pixelsq], m1
movhps [lsize3q+pixelsq], m1
%endif
%endmacro
%macro PUT_PIXELS_CLAMPED 0
cglobal put_pixels_clamped, 3, 4, 2, block, pixels, lsize, lsize3
lea lsize3q, [lsizeq*3]
PUT_PIXELS_CLAMPED_HALF 0
lea pixelsq, [pixelsq+lsizeq*4]
PUT_PIXELS_CLAMPED_HALF 64
RET
%endmacro
INIT_MMX mmx
PUT_PIXELS_CLAMPED
INIT_XMM sse2
PUT_PIXELS_CLAMPED
;--------------------------------------------------------------------------
; void ff_add_pixels_clamped(const int16_t *block, uint8_t *pixels,
; ptrdiff_t line_size);
;--------------------------------------------------------------------------
; %1 = block offset
%macro ADD_PIXELS_CLAMPED 1
mova m0, [blockq+mmsize*0+%1]
mova m1, [blockq+mmsize*1+%1]
%if mmsize == 8
mova m5, [blockq+mmsize*2+%1]
mova m6, [blockq+mmsize*3+%1]
%endif
movq m2, [pixelsq]
movq m3, [pixelsq+lsizeq]
%if mmsize == 8
mova m7, m2
punpcklbw m2, m4
punpckhbw m7, m4
paddsw m0, m2
paddsw m1, m7
mova m7, m3
punpcklbw m3, m4
punpckhbw m7, m4
paddsw m5, m3
paddsw m6, m7
%else
punpcklbw m2, m4
punpcklbw m3, m4
paddsw m0, m2
paddsw m1, m3
%endif
packuswb m0, m1
%if mmsize == 8
packuswb m5, m6
movq [pixelsq], m0
movq [pixelsq+lsizeq], m5
%else
movq [pixelsq], m0
movhps [pixelsq+lsizeq], m0
%endif
%endmacro
%macro ADD_PIXELS_CLAMPED 0
cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize
pxor m4, m4
ADD_PIXELS_CLAMPED 0
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 32
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 64
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 96
RET
%endmacro
INIT_MMX mmx
ADD_PIXELS_CLAMPED
INIT_XMM sse2
ADD_PIXELS_CLAMPED

View File

@ -0,0 +1,39 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_X86_IDCTDSP_H
#define AVCODEC_X86_IDCTDSP_H
#include <stddef.h>
#include <stdint.h>
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
void ff_put_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
void ff_put_signed_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
#endif /* AVCODEC_X86_IDCTDSP_H */

View File

@ -0,0 +1,162 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/idctdsp.h"
#include "idctdsp.h"
#include "simple_idct.h"
/* Input permutation for the simple_idct_mmx */
static const uint8_t simple_mmx_permutation[64] = {
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
};
static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
av_cold int ff_init_scantable_permutation_x86(uint8_t *idct_permutation,
enum idct_permutation_type perm_type)
{
int i;
switch (perm_type) {
case FF_IDCT_PERM_SIMPLE:
for (i = 0; i < 64; i++)
idct_permutation[i] = simple_mmx_permutation[i];
return 1;
case FF_IDCT_PERM_SSE2:
for (i = 0; i < 64; i++)
idct_permutation[i] = (i & 0x38) | idct_sse2_row_perm[i & 7];
return 1;
}
return 0;
}
av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_MMX(cpu_flags)) {
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
if (!high_bit_depth &&
avctx->lowres == 0 &&
(avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
c->idct_put = ff_simple_idct_put_mmx;
c->idct_add = ff_simple_idct_add_mmx;
c->idct = ff_simple_idct_mmx;
c->perm_type = FF_IDCT_PERM_SIMPLE;
}
}
if (EXTERNAL_SSE2(cpu_flags)) {
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
c->put_pixels_clamped = ff_put_pixels_clamped_sse2;
c->add_pixels_clamped = ff_add_pixels_clamped_sse2;
if (!high_bit_depth &&
avctx->lowres == 0 &&
(avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
c->idct_put = ff_simple_idct_put_sse2;
c->idct_add = ff_simple_idct_add_sse2;
c->perm_type = FF_IDCT_PERM_SIMPLE;
}
if (ARCH_X86_64 &&
!high_bit_depth &&
avctx->lowres == 0 &&
(avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEMMX ||
avctx->idct_algo == FF_IDCT_SIMPLE)) {
c->idct = ff_simple_idct8_sse2;
c->idct_put = ff_simple_idct8_put_sse2;
c->idct_add = ff_simple_idct8_add_sse2;
c->perm_type = FF_IDCT_PERM_TRANSPOSE;
}
}
if (ARCH_X86_64 && avctx->lowres == 0) {
if (EXTERNAL_AVX(cpu_flags) &&
!high_bit_depth &&
(avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEMMX ||
avctx->idct_algo == FF_IDCT_SIMPLE)) {
c->idct = ff_simple_idct8_avx;
c->idct_put = ff_simple_idct8_put_avx;
c->idct_add = ff_simple_idct8_add_avx;
c->perm_type = FF_IDCT_PERM_TRANSPOSE;
}
if (avctx->bits_per_raw_sample == 10 &&
avctx->codec_id != AV_CODEC_ID_MPEG4 &&
(avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
avctx->idct_algo == FF_IDCT_SIMPLE)) {
if (EXTERNAL_SSE2(cpu_flags)) {
c->idct_put = ff_simple_idct10_put_sse2;
c->idct_add = NULL;
c->idct = ff_simple_idct10_sse2;
c->perm_type = FF_IDCT_PERM_TRANSPOSE;
}
if (EXTERNAL_AVX(cpu_flags)) {
c->idct_put = ff_simple_idct10_put_avx;
c->idct_add = NULL;
c->idct = ff_simple_idct10_avx;
c->perm_type = FF_IDCT_PERM_TRANSPOSE;
}
}
if (avctx->bits_per_raw_sample == 12 &&
(avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
if (EXTERNAL_SSE2(cpu_flags)) {
c->idct_put = ff_simple_idct12_put_sse2;
c->idct_add = NULL;
c->idct = ff_simple_idct12_sse2;
c->perm_type = FF_IDCT_PERM_TRANSPOSE;
}
if (EXTERNAL_AVX(cpu_flags)) {
c->idct_put = ff_simple_idct12_put_avx;
c->idct_add = NULL;
c->idct = ff_simple_idct12_avx;
c->perm_type = FF_IDCT_PERM_TRANSPOSE;
}
}
}
}

View File

@ -0,0 +1,741 @@
;******************************************************************************
;* 36 point SSE-optimized IMDCT transform
;* Copyright (c) 2011 Vitor Sessak
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
SECTION_RODATA
ps_mask: dd 0, ~0, ~0, ~0
ps_mask2: dd 0, ~0, 0, ~0
ps_mask3: dd 0, 0, 0, ~0
ps_mask4: dd 0, ~0, 0, 0
ps_val1: dd -0.5, -0.5, -0.8660254038, -0.8660254038
ps_val2: dd 1.0, 1.0, 0.8660254038, 0.8660254038
ps_val3: dd 0.1736481777, 0.1736481777, 0.3420201433, 0.3420201433
ps_val4: dd -0.7660444431, -0.7660444431, 0.8660254038, 0.8660254038
ps_val5: dd -0.9396926208, -0.9396926208, -0.9848077530, -0.9848077530
ps_val6: dd 0.5, 0.5, -0.6427876097, -0.6427876097
ps_val7: dd 1.0, 1.0, -0.6427876097, -0.6427876097
ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000
ps_p1m1p1m1: dd 0, 0x80000000, 0, 0x80000000
ps_cosh: dd 1.0, 0.50190991877167369479, 1.0, 5.73685662283492756461
dd 1.0, 0.51763809020504152469, 1.0, 1.93185165257813657349
dd 1.0, 0.55168895948124587824, -1.0, -1.18310079157624925896
dd 1.0, 0.61038729438072803416, -1.0, -0.87172339781054900991
dd 1.0, 0.70710678118654752439, 0.0, 0.0
ps_cosh_sse3: dd 1.0, -0.50190991877167369479, 1.0, -5.73685662283492756461
dd 1.0, -0.51763809020504152469, 1.0, -1.93185165257813657349
dd 1.0, -0.55168895948124587824, -1.0, 1.18310079157624925896
dd 1.0, -0.61038729438072803416, -1.0, 0.87172339781054900991
dd 1.0, -0.70710678118654752439, 0.0, 0.0
costabs: times 4 dd 0.98480773
times 4 dd 0.93969262
times 4 dd 0.86602539
times 4 dd -0.76604444
times 4 dd -0.64278764
times 4 dd 0.50000000
times 4 dd -0.50000000
times 4 dd -0.34202015
times 4 dd -0.17364818
times 4 dd 0.50190992
times 4 dd 0.51763808
times 4 dd 0.55168896
times 4 dd 0.61038726
times 4 dd 0.70710677
times 4 dd 0.87172341
times 4 dd 1.18310082
times 4 dd 1.93185163
times 4 dd 5.73685646
%define SBLIMIT 32
SECTION .text
%macro PSHUFD 3
%if cpuflag(sse2) && notcpuflag(avx)
pshufd %1, %2, %3
%else
shufps %1, %2, %2, %3
%endif
%endmacro
; input %2={x1,x2,x3,x4}, %3={y1,y2,y3,y4}
; output %1={x3,x4,y1,y2}
%macro BUILDINVHIGHLOW 3
%if cpuflag(avx)
shufps %1, %2, %3, 0x4e
%else
movlhps %1, %3
movhlps %1, %2
%endif
%endmacro
; input %2={x1,x2,x3,x4}, %3={y1,y2,y3,y4}
; output %1={x4,y1,y2,y3}
%macro ROTLEFT 3
%if cpuflag(ssse3)
palignr %1, %3, %2, 12
%else
BUILDINVHIGHLOW %1, %2, %3
shufps %1, %1, %3, 0x99
%endif
%endmacro
%macro INVERTHL 2
%if cpuflag(sse2)
PSHUFD %1, %2, 0x4e
%else
movhlps %1, %2
movlhps %1, %2
%endif
%endmacro
%macro BUTTERF 3
INVERTHL %2, %1
xorps %1, [ps_p1p1m1m1]
addps %1, %2
%if cpuflag(sse3)
mulps %1, %1, [ps_cosh_sse3 + %3]
PSHUFD %2, %1, 0xb1
addsubps %1, %1, %2
%else
mulps %1, [ps_cosh + %3]
PSHUFD %2, %1, 0xb1
xorps %1, [ps_p1m1p1m1]
addps %1, %2
%endif
%endmacro
%macro BUTTERF2 3
%if cpuflag(sse3)
mulps %1, %1, [ps_cosh_sse3 + %3]
PSHUFD %2, %1, 0xe1
addsubps %1, %1, %2
%else
mulps %1, [ps_cosh + %3]
PSHUFD %2, %1, 0xe1
xorps %1, [ps_p1m1p1m1]
addps %1, %2
%endif
%endmacro
%macro STORE 4
%if cpuflag(sse4)
movss [%3 ], %1
extractps dword [%3 + %4], %1, 1
extractps dword [%3 + 2*%4], %1, 2
extractps dword [%3 + 3*%4], %1, 3
%else
movhlps %2, %1
movss [%3 ], %1
movss [%3 + 2*%4], %2
shufps %1, %1, 0xb1
movss [%3 + %4], %1
movhlps %2, %1
movss [%3 + 3*%4], %2
%endif
%endmacro
%macro LOAD 4
movlps %1, [%3 ]
movhps %1, [%3 + %4]
movlps %2, [%3 + 2*%4]
movhps %2, [%3 + 3*%4]
shufps %1, %2, 0x88
%endmacro
%macro LOADA64 2
%if cpuflag(avx)
movu %1, [%2]
%else
movlps %1, [%2]
movhps %1, [%2 + 8]
%endif
%endmacro
%macro DEFINE_IMDCT 0
cglobal imdct36_float, 4,4,9, out, buf, in, win
; for(i=17;i>=1;i--) in[i] += in[i-1];
LOADA64 m0, inq
LOADA64 m1, inq + 16
ROTLEFT m5, m0, m1
PSHUFD m6, m0, 0x93
andps m6, m6, [ps_mask]
addps m0, m0, m6
LOADA64 m2, inq + 32
ROTLEFT m7, m1, m2
addps m1, m1, m5
LOADA64 m3, inq + 48
ROTLEFT m5, m2, m3
xorps m4, m4, m4
movlps m4, [inq+64]
BUILDINVHIGHLOW m6, m3, m4
shufps m6, m6, m4, 0xa9
addps m4, m4, m6
addps m2, m2, m7
addps m3, m3, m5
; for(i=17;i>=3;i-=2) in[i] += in[i-2];
movlhps m5, m5, m0
andps m5, m5, [ps_mask3]
BUILDINVHIGHLOW m7, m0, m1
andps m7, m7, [ps_mask2]
addps m0, m0, m5
BUILDINVHIGHLOW m6, m1, m2
andps m6, m6, [ps_mask2]
addps m1, m1, m7
BUILDINVHIGHLOW m7, m2, m3
andps m7, m7, [ps_mask2]
addps m2, m2, m6
movhlps m6, m6, m3
andps m6, m6, [ps_mask4]
addps m3, m3, m7
addps m4, m4, m6
; Populate tmp[]
movlhps m6, m1, m5 ; zero out high values
subps m6, m6, m4
subps m5, m0, m3
%if ARCH_X86_64
SWAP m5, m8
%endif
mulps m7, m2, [ps_val1]
%if ARCH_X86_64
mulps m5, m8, [ps_val2]
%else
mulps m5, m5, [ps_val2]
%endif
addps m7, m7, m5
mulps m5, m6, [ps_val1]
subps m7, m7, m5
%if ARCH_X86_64
SWAP m5, m8
%else
subps m5, m0, m3
%endif
subps m5, m5, m6
addps m5, m5, m2
shufps m6, m4, m3, 0xe4
subps m6, m6, m2
mulps m6, m6, [ps_val3]
addps m4, m4, m1
mulps m4, m4, [ps_val4]
shufps m1, m1, m0, 0xe4
addps m1, m1, m2
mulps m1, m1, [ps_val5]
mulps m3, m3, [ps_val6]
mulps m0, m0, [ps_val7]
addps m0, m0, m3
xorps m2, m1, [ps_p1p1m1m1]
subps m2, m2, m4
addps m2, m2, m0
addps m3, m4, m0
subps m3, m3, m6
xorps m3, m3, [ps_p1p1m1m1]
shufps m0, m0, m4, 0xe4
subps m0, m0, m1
addps m0, m0, m6
BUILDINVHIGHLOW m4, m2, m3
shufps m3, m3, m2, 0x4e
; we have tmp = {SwAPLH(m0), SwAPLH(m7), m3, m4, m5}
BUTTERF m0, m1, 0
BUTTERF m7, m2, 16
BUTTERF m3, m6, 32
BUTTERF m4, m1, 48
BUTTERF2 m5, m1, 64
; permutates:
; m0 0 1 2 3 => 2 6 10 14 m1
; m7 4 5 6 7 => 3 7 11 15 m2
; m3 8 9 10 11 => 17 13 9 5 m3
; m4 12 13 14 15 => 16 12 8 4 m5
; m5 16 17 xx xx => 0 1 xx xx m0
unpckhps m1, m0, m7
unpckhps m6, m3, m4
movhlps m2, m6, m1
movlhps m1, m1, m6
unpcklps m5, m5, m4
unpcklps m3, m3, m7
movhlps m4, m3, m5
movlhps m5, m5, m3
SWAP m4, m3
; permutation done
PSHUFD m6, m2, 0xb1
movss m4, [bufq + 4*68]
movss m7, [bufq + 4*64]
unpcklps m7, m7, m4
mulps m6, m6, [winq + 16*4]
addps m6, m6, m7
movss [outq + 64*SBLIMIT], m6
shufps m6, m6, m6, 0xb1
movss [outq + 68*SBLIMIT], m6
mulps m6, m3, [winq + 4*4]
LOAD m4, m7, bufq + 4*16, 16
addps m6, m6, m4
STORE m6, m7, outq + 16*SBLIMIT, 4*SBLIMIT
shufps m4, m0, m3, 0xb5
mulps m4, m4, [winq + 8*4]
LOAD m7, m6, bufq + 4*32, 16
addps m4, m4, m7
STORE m4, m6, outq + 32*SBLIMIT, 4*SBLIMIT
shufps m3, m3, m2, 0xb1
mulps m3, m3, [winq + 12*4]
LOAD m7, m6, bufq + 4*48, 16
addps m3, m3, m7
STORE m3, m7, outq + 48*SBLIMIT, 4*SBLIMIT
mulps m2, m2, [winq]
LOAD m6, m7, bufq, 16
addps m2, m2, m6
STORE m2, m7, outq, 4*SBLIMIT
mulps m4, m1, [winq + 20*4]
STORE m4, m7, bufq, 16
mulps m3, m5, [winq + 24*4]
STORE m3, m7, bufq + 4*16, 16
shufps m0, m0, m5, 0xb0
mulps m0, m0, [winq + 28*4]
STORE m0, m7, bufq + 4*32, 16
shufps m5, m5, m1, 0xb1
mulps m5, m5, [winq + 32*4]
STORE m5, m7, bufq + 4*48, 16
shufps m1, m1, m1, 0xb1
mulps m1, m1, [winq + 36*4]
movss [bufq + 4*64], m1
shufps m1, m1, 0xb1
movss [bufq + 4*68], m1
RET
%endmacro
%if ARCH_X86_32
INIT_XMM sse
DEFINE_IMDCT
%endif
INIT_XMM sse2
DEFINE_IMDCT
INIT_XMM sse3
DEFINE_IMDCT
INIT_XMM ssse3
DEFINE_IMDCT
%if HAVE_AVX_EXTERNAL
INIT_XMM avx
DEFINE_IMDCT
%endif
INIT_XMM sse
%if ARCH_X86_64
%define SPILL SWAP
%define UNSPILL SWAP
%define SPILLED(x) m %+ x
%else
%define SPILLED(x) [tmpq+(x-8)*16 + 32*4]
%macro SPILL 2 ; xmm#, mempos
movaps SPILLED(%2), m%1
%endmacro
%macro UNSPILL 2
movaps m%1, SPILLED(%2)
%endmacro
%endif
%macro DEFINE_FOUR_IMDCT 0
cglobal four_imdct36_float, 5,5,16, out, buf, in, win, tmp
movlps m0, [inq+64]
movhps m0, [inq+64 + 72]
movlps m3, [inq+64 + 2*72]
movhps m3, [inq+64 + 3*72]
shufps m5, m0, m3, 0xdd
shufps m0, m0, m3, 0x88
mova m1, [inq+48]
movu m6, [inq+48 + 72]
mova m7, [inq+48 + 2*72]
movu m3, [inq+48 + 3*72]
TRANSPOSE4x4PS 1, 6, 7, 3, 4
addps m4, m6, m7
mova [tmpq+4*28], m4
addps m7, m3
addps m6, m1
addps m3, m0
addps m0, m5
addps m0, m7
addps m7, m6
mova [tmpq+4*12], m7
SPILL 3, 12
mova m4, [inq+32]
movu m5, [inq+32 + 72]
mova m2, [inq+32 + 2*72]
movu m7, [inq+32 + 3*72]
TRANSPOSE4x4PS 4, 5, 2, 7, 3
addps m1, m7
SPILL 1, 11
addps m3, m5, m2
SPILL 3, 13
addps m7, m2
addps m5, m4
addps m6, m7
mova [tmpq], m6
addps m7, m5
mova [tmpq+4*16], m7
mova m2, [inq+16]
movu m7, [inq+16 + 72]
mova m1, [inq+16 + 2*72]
movu m6, [inq+16 + 3*72]
TRANSPOSE4x4PS 2, 7, 1, 6, 3
addps m4, m6
addps m6, m1
addps m1, m7
addps m7, m2
addps m5, m6
SPILL 5, 15
addps m6, m7
mulps m6, [costabs + 16*2]
mova [tmpq+4*8], m6
SPILL 1, 10
SPILL 0, 14
mova m1, [inq]
movu m6, [inq + 72]
mova m3, [inq + 2*72]
movu m5, [inq + 3*72]
TRANSPOSE4x4PS 1, 6, 3, 5, 0
addps m2, m5
addps m5, m3
addps m7, m5
addps m3, m6
addps m6, m1
SPILL 7, 8
addps m5, m6
SPILL 6, 9
addps m6, m4, SPILLED(12)
subps m6, m2
UNSPILL 7, 11
SPILL 5, 11
subps m5, m1, m7
mulps m7, [costabs + 16*5]
addps m7, m1
mulps m0, m6, [costabs + 16*6]
addps m0, m5
mova [tmpq+4*24], m0
addps m6, m5
mova [tmpq+4*4], m6
addps m6, m4, m2
mulps m6, [costabs + 16*1]
subps m4, SPILLED(12)
mulps m4, [costabs + 16*8]
addps m2, SPILLED(12)
mulps m2, [costabs + 16*3]
subps m5, m7, m6
subps m5, m2
addps m6, m7
addps m6, m4
addps m7, m2
subps m7, m4
mova [tmpq+4*20], m7
mova m2, [tmpq+4*28]
mova [tmpq+4*28], m5
UNSPILL 7, 13
subps m5, m7, m2
mulps m5, [costabs + 16*7]
UNSPILL 1, 10
mulps m1, [costabs + 16*2]
addps m4, m3, m2
mulps m4, [costabs + 16*4]
addps m2, m7
addps m7, m3
mulps m7, [costabs]
subps m3, m2
mulps m3, [costabs + 16*2]
addps m2, m7, m5
addps m2, m1
SPILL 2, 10
addps m7, m4
subps m7, m1
SPILL 7, 12
subps m5, m4
subps m5, m1
UNSPILL 0, 14
SPILL 5, 13
addps m1, m0, SPILLED(15)
subps m1, SPILLED(8)
mova m4, [costabs + 16*5]
mulps m4, [tmpq]
UNSPILL 2, 9
addps m4, m2
subps m2, [tmpq]
mulps m5, m1, [costabs + 16*6]
addps m5, m2
SPILL 5, 9
addps m2, m1
SPILL 2, 14
UNSPILL 5, 15
subps m7, m5, m0
addps m5, SPILLED(8)
mulps m5, [costabs + 16*1]
mulps m7, [costabs + 16*8]
addps m0, SPILLED(8)
mulps m0, [costabs + 16*3]
subps m2, m4, m5
subps m2, m0
SPILL 2, 15
addps m5, m4
addps m5, m7
addps m4, m0
subps m4, m7
SPILL 4, 8
mova m7, [tmpq+4*16]
mova m2, [tmpq+4*12]
addps m0, m7, m2
subps m0, SPILLED(11)
mulps m0, [costabs + 16*2]
addps m4, m7, SPILLED(11)
mulps m4, [costabs]
subps m7, m2
mulps m7, [costabs + 16*7]
addps m2, SPILLED(11)
mulps m2, [costabs + 16*4]
addps m1, m7, [tmpq+4*8]
addps m1, m4
addps m4, m2
subps m4, [tmpq+4*8]
SPILL 4, 11
subps m7, m2
subps m7, [tmpq+4*8]
addps m4, m6, SPILLED(10)
subps m6, SPILLED(10)
addps m2, m5, m1
mulps m2, [costabs + 16*9]
subps m5, m1
mulps m5, [costabs + 16*17]
subps m1, m4, m2
addps m4, m2
mulps m2, m1, [winq+4*36]
addps m2, [bufq+4*36]
mova [outq+1152], m2
mulps m1, [winq+4*32]
addps m1, [bufq+4*32]
mova [outq+1024], m1
mulps m1, m4, [winq+4*116]
mova [bufq+4*36], m1
mulps m4, [winq+4*112]
mova [bufq+4*32], m4
addps m2, m6, m5
subps m6, m5
mulps m1, m6, [winq+4*68]
addps m1, [bufq+4*68]
mova [outq+2176], m1
mulps m6, [winq]
addps m6, [bufq]
mova [outq], m6
mulps m1, m2, [winq+4*148]
mova [bufq+4*68], m1
mulps m2, [winq+4*80]
mova [bufq], m2
addps m5, m3, [tmpq+4*24]
mova m2, [tmpq+4*24]
subps m2, m3
mova m1, SPILLED(9)
subps m1, m0
mulps m1, [costabs + 16*10]
addps m0, SPILLED(9)
mulps m0, [costabs + 16*16]
addps m6, m5, m1
subps m5, m1
mulps m3, m5, [winq+4*40]
addps m3, [bufq+4*40]
mova [outq+1280], m3
mulps m5, [winq+4*28]
addps m5, [bufq+4*28]
mova [outq+896], m5
mulps m1, m6, [winq+4*120]
mova [bufq+4*40], m1
mulps m6, [winq+4*108]
mova [bufq+4*28], m6
addps m1, m2, m0
subps m2, m0
mulps m5, m2, [winq+4*64]
addps m5, [bufq+4*64]
mova [outq+2048], m5
mulps m2, [winq+4*4]
addps m2, [bufq+4*4]
mova [outq+128], m2
mulps m0, m1, [winq+4*144]
mova [bufq+4*64], m0
mulps m1, [winq+4*84]
mova [bufq+4*4], m1
mova m1, [tmpq+4*28]
mova m5, m1
addps m1, SPILLED(13)
subps m5, SPILLED(13)
UNSPILL 3, 15
addps m2, m7, m3
mulps m2, [costabs + 16*11]
subps m3, m7
mulps m3, [costabs + 16*15]
addps m0, m2, m1
subps m1, m2
SWAP m0, m2
mulps m6, m1, [winq+4*44]
addps m6, [bufq+4*44]
mova [outq+1408], m6
mulps m1, [winq+4*24]
addps m1, [bufq+4*24]
mova [outq+768], m1
mulps m0, m2, [winq+4*124]
mova [bufq+4*44], m0
mulps m2, [winq+4*104]
mova [bufq+4*24], m2
addps m0, m5, m3
subps m5, m3
mulps m1, m5, [winq+4*60]
addps m1, [bufq+4*60]
mova [outq+1920], m1
mulps m5, [winq+4*8]
addps m5, [bufq+4*8]
mova [outq+256], m5
mulps m1, m0, [winq+4*140]
mova [bufq+4*60], m1
mulps m0, [winq+4*88]
mova [bufq+4*8], m0
mova m1, [tmpq+4*20]
addps m1, SPILLED(12)
mova m2, [tmpq+4*20]
subps m2, SPILLED(12)
UNSPILL 7, 8
subps m0, m7, SPILLED(11)
addps m7, SPILLED(11)
mulps m4, m7, [costabs + 16*12]
mulps m0, [costabs + 16*14]
addps m5, m1, m4
subps m1, m4
mulps m7, m1, [winq+4*48]
addps m7, [bufq+4*48]
mova [outq+1536], m7
mulps m1, [winq+4*20]
addps m1, [bufq+4*20]
mova [outq+640], m1
mulps m1, m5, [winq+4*128]
mova [bufq+4*48], m1
mulps m5, [winq+4*100]
mova [bufq+4*20], m5
addps m6, m2, m0
subps m2, m0
mulps m1, m2, [winq+4*56]
addps m1, [bufq+4*56]
mova [outq+1792], m1
mulps m2, [winq+4*12]
addps m2, [bufq+4*12]
mova [outq+384], m2
mulps m0, m6, [winq+4*136]
mova [bufq+4*56], m0
mulps m6, [winq+4*92]
mova [bufq+4*12], m6
UNSPILL 0, 14
mulps m0, [costabs + 16*13]
mova m3, [tmpq+4*4]
addps m2, m0, m3
subps m3, m0
mulps m0, m3, [winq+4*52]
addps m0, [bufq+4*52]
mova [outq+1664], m0
mulps m3, [winq+4*16]
addps m3, [bufq+4*16]
mova [outq+512], m3
mulps m0, m2, [winq+4*132]
mova [bufq+4*52], m0
mulps m2, [winq+4*96]
mova [bufq+4*16], m2
RET
%endmacro
INIT_XMM sse
DEFINE_FOUR_IMDCT
%if HAVE_AVX_EXTERNAL
INIT_XMM avx
DEFINE_FOUR_IMDCT
%endif

View File

@ -6,11 +6,21 @@
SOURCES += [
'constants.c',
'dct32.asm',
'dct_init.c',
'fdct.c',
'fdctdsp_init.c',
'flacdsp.asm',
'flacdsp_init.c',
'h264_intrapred.asm',
'h264_intrapred_10bit.asm',
'h264_intrapred_init.c',
'idctdsp.asm',
'idctdsp_init.c',
'imdct36.asm',
'mpegaudiodsp.c',
'simple_idct.asm',
'simple_idct10.asm',
'videodsp.asm',
'videodsp_init.c',
'vp8dsp.asm',

View File

@ -0,0 +1,289 @@
/*
* SIMD-optimized MP3 decoding functions
* Copyright (c) 2010 Vitor Sessak
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/internal.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/mpegaudiodsp.h"
#define DECL(CPU)\
static void imdct36_blocks_ ## CPU(float *out, float *buf, float *in, int count, int switch_point, int block_type);\
void ff_imdct36_float_ ## CPU(float *out, float *buf, float *in, float *win);
#if HAVE_X86ASM
#if ARCH_X86_32
DECL(sse)
#endif
DECL(sse2)
DECL(sse3)
DECL(ssse3)
DECL(avx)
#endif /* HAVE_X86ASM */
void ff_four_imdct36_float_sse(float *out, float *buf, float *in, float *win,
float *tmpbuf);
void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win,
float *tmpbuf);
DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40];
#if HAVE_6REGS && HAVE_SSE_INLINE
#define MACS(rt, ra, rb) rt+=(ra)*(rb)
#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
#define SUM8(op, sum, w, p) \
{ \
op(sum, (w)[0 * 64], (p)[0 * 64]); \
op(sum, (w)[1 * 64], (p)[1 * 64]); \
op(sum, (w)[2 * 64], (p)[2 * 64]); \
op(sum, (w)[3 * 64], (p)[3 * 64]); \
op(sum, (w)[4 * 64], (p)[4 * 64]); \
op(sum, (w)[5 * 64], (p)[5 * 64]); \
op(sum, (w)[6 * 64], (p)[6 * 64]); \
op(sum, (w)[7 * 64], (p)[7 * 64]); \
}
static void apply_window(const float *buf, const float *win1,
const float *win2, float *sum1, float *sum2, int len)
{
x86_reg count = - 4*len;
const float *win1a = win1+len;
const float *win2a = win2+len;
const float *bufa = buf+len;
float *sum1a = sum1+len;
float *sum2a = sum2+len;
#define MULT(a, b) \
"movaps " #a "(%1,%0), %%xmm1 \n\t" \
"movaps " #a "(%3,%0), %%xmm2 \n\t" \
"mulps %%xmm2, %%xmm1 \n\t" \
"subps %%xmm1, %%xmm0 \n\t" \
"mulps " #b "(%2,%0), %%xmm2 \n\t" \
"subps %%xmm2, %%xmm4 \n\t" \
__asm__ volatile(
"1: \n\t"
"xorps %%xmm0, %%xmm0 \n\t"
"xorps %%xmm4, %%xmm4 \n\t"
MULT( 0, 0)
MULT( 256, 64)
MULT( 512, 128)
MULT( 768, 192)
MULT(1024, 256)
MULT(1280, 320)
MULT(1536, 384)
MULT(1792, 448)
"movaps %%xmm0, (%4,%0) \n\t"
"movaps %%xmm4, (%5,%0) \n\t"
"add $16, %0 \n\t"
"jl 1b \n\t"
:"+&r"(count)
:"r"(win1a), "r"(win2a), "r"(bufa), "r"(sum1a), "r"(sum2a)
);
#undef MULT
}
static void apply_window_mp3(float *in, float *win, int *unused, float *out,
ptrdiff_t incr)
{
LOCAL_ALIGNED_16(float, suma, [17]);
LOCAL_ALIGNED_16(float, sumb, [17]);
LOCAL_ALIGNED_16(float, sumc, [17]);
LOCAL_ALIGNED_16(float, sumd, [17]);
float sum;
/* copy to avoid wrap */
__asm__ volatile(
"movaps 0(%0), %%xmm0 \n\t" \
"movaps 16(%0), %%xmm1 \n\t" \
"movaps 32(%0), %%xmm2 \n\t" \
"movaps 48(%0), %%xmm3 \n\t" \
"movaps %%xmm0, 0(%1) \n\t" \
"movaps %%xmm1, 16(%1) \n\t" \
"movaps %%xmm2, 32(%1) \n\t" \
"movaps %%xmm3, 48(%1) \n\t" \
"movaps 64(%0), %%xmm0 \n\t" \
"movaps 80(%0), %%xmm1 \n\t" \
"movaps 96(%0), %%xmm2 \n\t" \
"movaps 112(%0), %%xmm3 \n\t" \
"movaps %%xmm0, 64(%1) \n\t" \
"movaps %%xmm1, 80(%1) \n\t" \
"movaps %%xmm2, 96(%1) \n\t" \
"movaps %%xmm3, 112(%1) \n\t"
::"r"(in), "r"(in+512)
:"memory"
);
apply_window(in + 16, win , win + 512, suma, sumc, 16);
apply_window(in + 32, win + 48, win + 640, sumb, sumd, 16);
SUM8(MACS, suma[0], win + 32, in + 48);
sumc[ 0] = 0;
sumb[16] = 0;
sumd[16] = 0;
#define SUMS(suma, sumb, sumc, sumd, out1, out2) \
"movups " #sumd "(%4), %%xmm0 \n\t" \
"shufps $0x1b, %%xmm0, %%xmm0 \n\t" \
"subps " #suma "(%1), %%xmm0 \n\t" \
"movaps %%xmm0," #out1 "(%0) \n\t" \
\
"movups " #sumc "(%3), %%xmm0 \n\t" \
"shufps $0x1b, %%xmm0, %%xmm0 \n\t" \
"addps " #sumb "(%2), %%xmm0 \n\t" \
"movaps %%xmm0," #out2 "(%0) \n\t"
if (incr == 1) {
__asm__ volatile(
SUMS( 0, 48, 4, 52, 0, 112)
SUMS(16, 32, 20, 36, 16, 96)
SUMS(32, 16, 36, 20, 32, 80)
SUMS(48, 0, 52, 4, 48, 64)
:"+&r"(out)
:"r"(&suma[0]), "r"(&sumb[0]), "r"(&sumc[0]), "r"(&sumd[0])
:"memory"
);
out += 16*incr;
} else {
int j;
float *out2 = out + 32 * incr;
out[0 ] = -suma[ 0];
out += incr;
out2 -= incr;
for(j=1;j<16;j++) {
*out = -suma[ j] + sumd[16-j];
*out2 = sumb[16-j] + sumc[ j];
out += incr;
out2 -= incr;
}
}
sum = 0;
SUM8(MLSS, sum, win + 16 + 32, in + 32);
*out = sum;
}
#endif /* HAVE_6REGS && HAVE_SSE_INLINE */
#if HAVE_X86ASM
#define DECL_IMDCT_BLOCKS(CPU1, CPU2) \
static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in, \
int count, int switch_point, int block_type) \
{ \
int align_end = count - (count & 3); \
int j; \
for (j = 0; j < align_end; j+= 4) { \
LOCAL_ALIGNED_16(float, tmpbuf, [1024]); \
float *win = mdct_win_sse[switch_point && j < 4][block_type]; \
/* apply window & overlap with previous buffer */ \
\
/* select window */ \
ff_four_imdct36_float_ ## CPU2(out, buf, in, win, tmpbuf); \
in += 4*18; \
buf += 4*18; \
out += 4; \
} \
for (; j < count; j++) { \
/* apply window & overlap with previous buffer */ \
\
/* select window */ \
int win_idx = (switch_point && j < 2) ? 0 : block_type; \
float *win = ff_mdct_win_float[win_idx + (4 & -(j & 1))]; \
\
ff_imdct36_float_ ## CPU1(out, buf, in, win); \
\
in += 18; \
buf++; \
out++; \
} \
}
#if HAVE_SSE
#if ARCH_X86_32
DECL_IMDCT_BLOCKS(sse,sse)
#endif
DECL_IMDCT_BLOCKS(sse2,sse)
DECL_IMDCT_BLOCKS(sse3,sse)
DECL_IMDCT_BLOCKS(ssse3,sse)
#endif
#if HAVE_AVX_EXTERNAL
DECL_IMDCT_BLOCKS(avx,avx)
#endif
#endif /* HAVE_X86ASM */
av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
{
av_unused int cpu_flags = av_get_cpu_flags();
int i, j;
for (j = 0; j < 4; j++) {
for (i = 0; i < 40; i ++) {
mdct_win_sse[0][j][4*i ] = ff_mdct_win_float[j ][i];
mdct_win_sse[0][j][4*i + 1] = ff_mdct_win_float[j + 4][i];
mdct_win_sse[0][j][4*i + 2] = ff_mdct_win_float[j ][i];
mdct_win_sse[0][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
mdct_win_sse[1][j][4*i ] = ff_mdct_win_float[0 ][i];
mdct_win_sse[1][j][4*i + 1] = ff_mdct_win_float[4 ][i];
mdct_win_sse[1][j][4*i + 2] = ff_mdct_win_float[j ][i];
mdct_win_sse[1][j][4*i + 3] = ff_mdct_win_float[j + 4][i];
}
}
#if HAVE_6REGS && HAVE_SSE_INLINE
if (INLINE_SSE(cpu_flags)) {
s->apply_window_float = apply_window_mp3;
}
#endif /* HAVE_SSE_INLINE */
#if HAVE_X86ASM
#if HAVE_SSE
#if ARCH_X86_32
if (EXTERNAL_SSE(cpu_flags)) {
s->imdct36_blocks_float = imdct36_blocks_sse;
}
#endif
if (EXTERNAL_SSE2(cpu_flags)) {
s->imdct36_blocks_float = imdct36_blocks_sse2;
}
if (EXTERNAL_SSE3(cpu_flags)) {
s->imdct36_blocks_float = imdct36_blocks_sse3;
}
if (EXTERNAL_SSSE3(cpu_flags)) {
s->imdct36_blocks_float = imdct36_blocks_ssse3;
}
#endif
#if HAVE_AVX_EXTERNAL
if (EXTERNAL_AVX(cpu_flags)) {
s->imdct36_blocks_float = imdct36_blocks_avx;
}
#endif
#endif /* HAVE_X86ASM */
}

View File

@ -0,0 +1,889 @@
;
; Simple IDCT MMX
;
; Copyright (c) 2001, 2002 Michael Niedermayer <michaelni@gmx.at>
;
; Conversion from gcc syntax to x264asm syntax with minimal modifications
; by James Darnley <jdarnley@obe.tv>.
;
; This file is part of FFmpeg.
;
; FFmpeg is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public
; License as published by the Free Software Foundation; either
; version 2.1 of the License, or (at your option) any later version.
;
; FFmpeg is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with FFmpeg; if not, write to the Free Software
; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;/
%include "libavutil/x86/x86util.asm"
SECTION_RODATA
cextern pb_80
wm1010: dw 0, 0xffff, 0, 0xffff
d40000: dd 4 << 16, 0
; 23170.475006
; 22725.260826
; 21406.727617
; 19265.545870
; 16384.000000
; 12872.826198
; 8866.956905
; 4520.335430
%define C0 23170 ; cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
%define C1 22725 ; cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
%define C2 21407 ; cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
%define C3 19266 ; cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
%define C4 16383 ; cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
%define C5 12873 ; cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
%define C6 8867 ; cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
%define C7 4520 ; cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
%define ROW_SHIFT 11
%define COL_SHIFT 20 ; 6
coeffs:
dw 1 << (ROW_SHIFT - 1), 0
dw 1 << (ROW_SHIFT - 1), 0
dw 1 << (ROW_SHIFT - 1), 1
dw 1 << (ROW_SHIFT - 1), 0
dw C4, C4, C4, C4
dw C4, -C4, C4, -C4
dw C2, C6, C2, C6
dw C6, -C2, C6, -C2
dw C1, C3, C1, C3
dw C5, C7, C5, C7
dw C3, -C7, C3, -C7
dw -C1, -C5, -C1, -C5
dw C5, -C1, C5, -C1
dw C7, C3, C7, C3
dw C7, -C5, C7, -C5
dw C3, -C1, C3, -C1
SECTION .text
%macro DC_COND_IDCT 7
movq mm0, [blockq + %1] ; R4 R0 r4 r0
movq mm1, [blockq + %2] ; R6 R2 r6 r2
movq mm2, [blockq + %3] ; R3 R1 r3 r1
movq mm3, [blockq + %4] ; R7 R5 r7 r5
movq mm4, [wm1010]
pand mm4, mm0
por mm4, mm1
por mm4, mm2
por mm4, mm3
packssdw mm4, mm4
movd t0d, mm4
or t0d, t0d
jz %%1
movq mm4, [coeffs + 16] ; C4 C4 C4 C4
pmaddwd mm4, mm0 ; C4R4+C4R0 C4r4+C4r0
movq mm5, [coeffs + 24] ; -C4 C4 -C4 C4
pmaddwd mm0, mm5 ; -C4R4+C4R0 -C4r4+C4r0
movq mm5, [coeffs + 32] ; C6 C2 C6 C2
pmaddwd mm5, mm1 ; C6R6+C2R2 C6r6+C2r2
movq mm6, [coeffs + 40] ; -C2 C6 -C2 C6
pmaddwd mm1, mm6 ; -C2R6+C6R2 -C2r6+C6r2
movq mm7, [coeffs + 48] ; C3 C1 C3 C1
pmaddwd mm7, mm2 ; C3R3+C1R1 C3r3+C1r1
paddd mm4, [coeffs + 8]
movq mm6, mm4 ; C4R4+C4R0 C4r4+C4r0
paddd mm4, mm5 ; A0 a0
psubd mm6, mm5 ; A3 a3
movq mm5, [coeffs + 56] ; C7 C5 C7 C5
pmaddwd mm5, mm3 ; C7R7+C5R5 C7r7+C5r5
paddd mm0, [coeffs + 8]
paddd mm1, mm0 ; A1 a1
paddd mm0, mm0
psubd mm0, mm1 ; A2 a2
pmaddwd mm2, [coeffs + 64] ; -C7R3+C3R1 -C7r3+C3r1
paddd mm7, mm5 ; B0 b0
movq mm5, [coeffs + 72] ; -C5 -C1 -C5 -C1
pmaddwd mm5, mm3 ; -C5R7-C1R5 -C5r7-C1r5
paddd mm7, mm4 ; A0+B0 a0+b0
paddd mm4, mm4 ; 2A0 2a0
psubd mm4, mm7 ; A0-B0 a0-b0
paddd mm5, mm2 ; B1 b1
psrad mm7, %7
psrad mm4, %7
movq mm2, mm1 ; A1 a1
paddd mm1, mm5 ; A1+B1 a1+b1
psubd mm2, mm5 ; A1-B1 a1-b1
psrad mm1, %7
psrad mm2, %7
packssdw mm7, mm1 ; A1+B1 a1+b1 A0+B0 a0+b0
packssdw mm2, mm4 ; A0-B0 a0-b0 A1-B1 a1-b1
movq [%5], mm7
movq mm1, [blockq + %3] ; R3 R1 r3 r1
movq mm4, [coeffs + 80] ; -C1 C5 -C1 C5
movq [24 + %5], mm2
pmaddwd mm4, mm1 ; -C1R3+C5R1 -C1r3+C5r1
movq mm7, [coeffs + 88] ; C3 C7 C3 C7
pmaddwd mm1, [coeffs + 96] ; -C5R3+C7R1 -C5r3+C7r1
pmaddwd mm7, mm3 ; C3R7+C7R5 C3r7+C7r5
movq mm2, mm0 ; A2 a2
pmaddwd mm3, [coeffs + 104] ; -C1R7+C3R5 -C1r7+C3r5
paddd mm4, mm7 ; B2 b2
paddd mm2, mm4 ; A2+B2 a2+b2
psubd mm0, mm4 ; a2-B2 a2-b2
psrad mm2, %7
psrad mm0, %7
movq mm4, mm6 ; A3 a3
paddd mm3, mm1 ; B3 b3
paddd mm6, mm3 ; A3+B3 a3+b3
psubd mm4, mm3 ; a3-B3 a3-b3
psrad mm6, %7
packssdw mm2, mm6 ; A3+B3 a3+b3 A2+B2 a2+b2
movq [8 + %5], mm2
psrad mm4, %7
packssdw mm4, mm0 ; A2-B2 a2-b2 A3-B3 a3-b3
movq [16 + %5], mm4
jmp %%2
%%1:
pslld mm0, 16
paddd mm0, [d40000]
psrad mm0, 13
packssdw mm0, mm0
movq [%5], mm0
movq [8 + %5], mm0
movq [16 + %5], mm0
movq [24 + %5], mm0
%%2:
%endmacro
%macro Z_COND_IDCT 8
movq mm0, [blockq + %1] ; R4 R0 r4 r0
movq mm1, [blockq + %2] ; R6 R2 r6 r2
movq mm2, [blockq + %3] ; R3 R1 r3 r1
movq mm3, [blockq + %4] ; R7 R5 r7 r5
movq mm4, mm0
por mm4, mm1
por mm4, mm2
por mm4, mm3
packssdw mm4, mm4
movd t0d, mm4
or t0d, t0d
jz %8
movq mm4, [coeffs + 16] ; C4 C4 C4 C4
pmaddwd mm4, mm0 ; C4R4+C4R0 C4r4+C4r0
movq mm5, [coeffs + 24] ; -C4 C4 -C4 C4
pmaddwd mm0, mm5 ; -C4R4+C4R0 -C4r4+C4r0
movq mm5, [coeffs + 32] ; C6 C2 C6 C2
pmaddwd mm5, mm1 ; C6R6+C2R2 C6r6+C2r2
movq mm6, [coeffs + 40] ; -C2 C6 -C2 C6
pmaddwd mm1, mm6 ; -C2R6+C6R2 -C2r6+C6r2
movq mm7, [coeffs + 48] ; C3 C1 C3 C1
pmaddwd mm7, mm2 ; C3R3+C1R1 C3r3+C1r1
paddd mm4, [coeffs]
movq mm6, mm4 ; C4R4+C4R0 C4r4+C4r0
paddd mm4, mm5 ; A0 a0
psubd mm6, mm5 ; A3 a3
movq mm5, [coeffs + 56] ; C7 C5 C7 C5
pmaddwd mm5, mm3 ; C7R7+C5R5 C7r7+C5r5
paddd mm0, [coeffs]
paddd mm1, mm0 ; A1 a1
paddd mm0, mm0
psubd mm0, mm1 ; A2 a2
pmaddwd mm2, [coeffs + 64] ; -C7R3+C3R1 -C7r3+C3r1
paddd mm7, mm5 ; B0 b0
movq mm5, [coeffs + 72] ; -C5 -C1 -C5 -C1
pmaddwd mm5, mm3 ; -C5R7-C1R5 -C5r7-C1r5
paddd mm7, mm4 ; A0+B0 a0+b0
paddd mm4, mm4 ; 2A0 2a0
psubd mm4, mm7 ; A0-B0 a0-b0
paddd mm5, mm2 ; B1 b1
psrad mm7, %7
psrad mm4, %7
movq mm2, mm1 ; A1 a1
paddd mm1, mm5 ; A1+B1 a1+b1
psubd mm2, mm5 ; A1-B1 a1-b1
psrad mm1, %7
psrad mm2, %7
packssdw mm7, mm1 ; A1+B1 a1+b1 A0+B0 a0+b0
packssdw mm2, mm4 ; A0-B0 a0-b0 A1-B1 a1-b1
movq [%5], mm7
movq mm1, [blockq + %3] ; R3 R1 r3 r1
movq mm4, [coeffs + 80] ; -C1 C5 -C1 C5
movq [24 + %5], mm2
pmaddwd mm4, mm1 ; -C1R3+C5R1 -C1r3+C5r1
movq mm7, [coeffs + 88] ; C3 C7 C3 C7
pmaddwd mm1, [coeffs + 96] ; -C5R3+C7R1 -C5r3+C7r1
pmaddwd mm7, mm3 ; C3R7+C7R5 C3r7+C7r5
movq mm2, mm0 ; A2 a2
pmaddwd mm3, [coeffs + 104] ; -C1R7+C3R5 -C1r7+C3r5
paddd mm4, mm7 ; B2 b2
paddd mm2, mm4 ; A2+B2 a2+b2
psubd mm0, mm4 ; a2-B2 a2-b2
psrad mm2, %7
psrad mm0, %7
movq mm4, mm6 ; A3 a3
paddd mm3, mm1 ; B3 b3
paddd mm6, mm3 ; A3+B3 a3+b3
psubd mm4, mm3 ; a3-B3 a3-b3
psrad mm6, %7
packssdw mm2, mm6 ; A3+B3 a3+b3 A2+B2 a2+b2
movq [8 + %5], mm2
psrad mm4, %7
packssdw mm4, mm0 ; A2-B2 a2-b2 A3-B3 a3-b3
movq [16 + %5], mm4
%endmacro
%macro IDCT1 6
movq mm0, %1 ; R4 R0 r4 r0
movq mm1, %2 ; R6 R2 r6 r2
movq mm2, %3 ; R3 R1 r3 r1
movq mm3, %4 ; R7 R5 r7 r5
movq mm4, [coeffs + 16] ; C4 C4 C4 C4
pmaddwd mm4, mm0 ; C4R4+C4R0 C4r4+C4r0
movq mm5, [coeffs + 24] ; -C4 C4 -C4 C4
pmaddwd mm0, mm5 ; -C4R4+C4R0 -C4r4+C4r0
movq mm5, [coeffs + 32] ; C6 C2 C6 C2
pmaddwd mm5, mm1 ; C6R6+C2R2 C6r6+C2r2
movq mm6, [coeffs + 40] ; -C2 C6 -C2 C6
pmaddwd mm1, mm6 ; -C2R6+C6R2 -C2r6+C6r2
movq mm6, mm4 ; C4R4+C4R0 C4r4+C4r0
movq mm7, [coeffs + 48] ; C3 C1 C3 C1
pmaddwd mm7, mm2 ; C3R3+C1R1 C3r3+C1r1
paddd mm4, mm5 ; A0 a0
psubd mm6, mm5 ; A3 a3
movq mm5, mm0 ; -C4R4+C4R0 -C4r4+C4r0
paddd mm0, mm1 ; A1 a1
psubd mm5, mm1 ; A2 a2
movq mm1, [coeffs + 56] ; C7 C5 C7 C5
pmaddwd mm1, mm3 ; C7R7+C5R5 C7r7+C5r5
pmaddwd mm2, [coeffs + 64] ; -C7R3+C3R1 -C7r3+C3r1
paddd mm7, mm1 ; B0 b0
movq mm1, [coeffs + 72] ; -C5 -C1 -C5 -C1
pmaddwd mm1, mm3 ; -C5R7-C1R5 -C5r7-C1r5
paddd mm7, mm4 ; A0+B0 a0+b0
paddd mm4, mm4 ; 2A0 2a0
psubd mm4, mm7 ; A0-B0 a0-b0
paddd mm1, mm2 ; B1 b1
psrad mm7, %6
psrad mm4, %6
movq mm2, mm0 ; A1 a1
paddd mm0, mm1 ; A1+B1 a1+b1
psubd mm2, mm1 ; A1-B1 a1-b1
psrad mm0, %6
psrad mm2, %6
packssdw mm7, mm7 ; A0+B0 a0+b0
movd [%5], mm7
packssdw mm0, mm0 ; A1+B1 a1+b1
movd [16 + %5], mm0
packssdw mm2, mm2 ; A1-B1 a1-b1
movd [96 + %5], mm2
packssdw mm4, mm4 ; A0-B0 a0-b0
movd [112 + %5], mm4
movq mm0, %3 ; R3 R1 r3 r1
movq mm4, [coeffs + 80] ; -C1 C5 -C1 C5
pmaddwd mm4, mm0 ; -C1R3+C5R1 -C1r3+C5r1
movq mm7, [coeffs + 88] ; C3 C7 C3 C7
pmaddwd mm0, [coeffs + 96] ; -C5R3+C7R1 -C5r3+C7r1
pmaddwd mm7, mm3 ; C3R7+C7R5 C3r7+C7r5
movq mm2, mm5 ; A2 a2
pmaddwd mm3, [coeffs + 104] ; -C1R7+C3R5 -C1r7+C3r5
paddd mm4, mm7 ; B2 b2
paddd mm2, mm4 ; A2+B2 a2+b2
psubd mm5, mm4 ; a2-B2 a2-b2
psrad mm2, %6
psrad mm5, %6
movq mm4, mm6 ; A3 a3
paddd mm3, mm0 ; B3 b3
paddd mm6, mm3 ; A3+B3 a3+b3
psubd mm4, mm3 ; a3-B3 a3-b3
psrad mm6, %6
psrad mm4, %6
packssdw mm2, mm2 ; A2+B2 a2+b2
packssdw mm6, mm6 ; A3+B3 a3+b3
movd [32 + %5], mm2
packssdw mm4, mm4 ; A3-B3 a3-b3
packssdw mm5, mm5 ; A2-B2 a2-b2
movd [48 + %5], mm6
movd [64 + %5], mm4
movd [80 + %5], mm5
%endmacro
%macro IDCT2 6
movq mm0, %1 ; R4 R0 r4 r0
movq mm1, %2 ; R6 R2 r6 r2
movq mm3, %4 ; R7 R5 r7 r5
movq mm4, [coeffs + 16] ; C4 C4 C4 C4
pmaddwd mm4, mm0 ; C4R4+C4R0 C4r4+C4r0
movq mm5, [coeffs + 24] ; -C4 C4 -C4 C4
pmaddwd mm0, mm5 ; -C4R4+C4R0 -C4r4+C4r0
movq mm5, [coeffs + 32] ; C6 C2 C6 C2
pmaddwd mm5, mm1 ; C6R6+C2R2 C6r6+C2r2
movq mm6, [coeffs + 40] ; -C2 C6 -C2 C6
pmaddwd mm1, mm6 ; -C2R6+C6R2 -C2r6+C6r2
movq mm6, mm4 ; C4R4+C4R0 C4r4+C4r0
paddd mm4, mm5 ; A0 a0
psubd mm6, mm5 ; A3 a3
movq mm5, mm0 ; -C4R4+C4R0 -C4r4+C4r0
paddd mm0, mm1 ; A1 a1
psubd mm5, mm1 ; A2 a2
movq mm1, [coeffs + 56] ; C7 C5 C7 C5
pmaddwd mm1, mm3 ; C7R7+C5R5 C7r7+C5r5
movq mm7, [coeffs + 72] ; -C5 -C1 -C5 -C1
pmaddwd mm7, mm3 ; -C5R7-C1R5 -C5r7-C1r5
paddd mm1, mm4 ; A0+B0 a0+b0
paddd mm4, mm4 ; 2A0 2a0
psubd mm4, mm1 ; A0-B0 a0-b0
psrad mm1, %6
psrad mm4, %6
movq mm2, mm0 ; A1 a1
paddd mm0, mm7 ; A1+B1 a1+b1
psubd mm2, mm7 ; A1-B1 a1-b1
psrad mm0, %6
psrad mm2, %6
packssdw mm1, mm1 ; A0+B0 a0+b0
movd [%5], mm1
packssdw mm0, mm0 ; A1+B1 a1+b1
movd [16 + %5], mm0
packssdw mm2, mm2 ; A1-B1 a1-b1
movd [96 + %5], mm2
packssdw mm4, mm4 ; A0-B0 a0-b0
movd [112 + %5], mm4
movq mm1, [coeffs + 88] ; C3 C7 C3 C7
pmaddwd mm1, mm3 ; C3R7+C7R5 C3r7+C7r5
movq mm2, mm5 ; A2 a2
pmaddwd mm3, [coeffs + 104] ; -C1R7+C3R5 -C1r7+C3r5
paddd mm2, mm1 ; A2+B2 a2+b2
psubd mm5, mm1 ; a2-B2 a2-b2
psrad mm2, %6
psrad mm5, %6
movq mm1, mm6 ; A3 a3
paddd mm6, mm3 ; A3+B3 a3+b3
psubd mm1, mm3 ; a3-B3 a3-b3
psrad mm6, %6
psrad mm1, %6
packssdw mm2, mm2 ; A2+B2 a2+b2
packssdw mm6, mm6 ; A3+B3 a3+b3
movd [32 + %5], mm2
packssdw mm1, mm1 ; A3-B3 a3-b3
packssdw mm5, mm5 ; A2-B2 a2-b2
movd [48 + %5], mm6
movd [64 + %5], mm1
movd [80 + %5], mm5
%endmacro
%macro IDCT3 6
movq mm0, %1 ; R4 R0 r4 r0
movq mm3, %4 ; R7 R5 r7 r5
movq mm4, [coeffs + 16] ; C4 C4 C4 C4
pmaddwd mm4, mm0 ; C4R4+C4R0 C4r4+C4r0
movq mm5, [coeffs + 24] ; -C4 C4 -C4 C4
pmaddwd mm0, mm5 ; -C4R4+C4R0 -C4r4+C4r0
movq mm6, mm4 ; C4R4+C4R0 C4r4+C4r0
movq mm5, mm0 ; -C4R4+C4R0 -C4r4+C4r0
movq mm1, [coeffs + 56] ; C7 C5 C7 C5
pmaddwd mm1, mm3 ; C7R7+C5R5 C7r7+C5r5
movq mm7, [coeffs + 72] ; -C5 -C1 -C5 -C1
pmaddwd mm7, mm3 ; -C5R7-C1R5 -C5r7-C1r5
paddd mm1, mm4 ; A0+B0 a0+b0
paddd mm4, mm4 ; 2A0 2a0
psubd mm4, mm1 ; A0-B0 a0-b0
psrad mm1, %6
psrad mm4, %6
movq mm2, mm0 ; A1 a1
paddd mm0, mm7 ; A1+B1 a1+b1
psubd mm2, mm7 ; A1-B1 a1-b1
psrad mm0, %6
psrad mm2, %6
packssdw mm1, mm1 ; A0+B0 a0+b0
movd [%5], mm1
packssdw mm0, mm0 ; A1+B1 a1+b1
movd [16 + %5], mm0
packssdw mm2, mm2 ; A1-B1 a1-b1
movd [96 + %5], mm2
packssdw mm4, mm4 ; A0-B0 a0-b0
movd [112 + %5], mm4
movq mm1, [coeffs + 88] ; C3 C7 C3 C7
pmaddwd mm1, mm3 ; C3R7+C7R5 C3r7+C7r5
movq mm2, mm5 ; A2 a2
pmaddwd mm3, [coeffs + 104] ; -C1R7+C3R5 -C1r7+C3r5
paddd mm2, mm1 ; A2+B2 a2+b2
psubd mm5, mm1 ; a2-B2 a2-b2
psrad mm2, %6
psrad mm5, %6
movq mm1, mm6 ; A3 a3
paddd mm6, mm3 ; A3+B3 a3+b3
psubd mm1, mm3 ; a3-B3 a3-b3
psrad mm6, %6
psrad mm1, %6
packssdw mm2, mm2 ; A2+B2 a2+b2
packssdw mm6, mm6 ; A3+B3 a3+b3
movd [32 + %5], mm2
packssdw mm1, mm1 ; A3-B3 a3-b3
packssdw mm5, mm5 ; A2-B2 a2-b2
movd [48 + %5], mm6
movd [64 + %5], mm1
movd [80 + %5], mm5
%endmacro
%macro IDCT4 6
movq mm0, %1 ; R4 R0 r4 r0
movq mm2, %3 ; R3 R1 r3 r1
movq mm3, %4 ; R7 R5 r7 r5
movq mm4, [coeffs + 16] ; C4 C4 C4 C4
pmaddwd mm4, mm0 ; C4R4+C4R0 C4r4+C4r0
movq mm5, [coeffs + 24] ; -C4 C4 -C4 C4
pmaddwd mm0, mm5 ; -C4R4+C4R0 -C4r4+C4r0
movq mm6, mm4 ; C4R4+C4R0 C4r4+C4r0
movq mm7, [coeffs + 48] ; C3 C1 C3 C1
pmaddwd mm7, mm2 ; C3R3+C1R1 C3r3+C1r1
movq mm5, mm0 ; -C4R4+C4R0 -C4r4+C4r0
movq mm1, [coeffs + 56] ; C7 C5 C7 C5
pmaddwd mm1, mm3 ; C7R7+C5R5 C7r7+C5r5
pmaddwd mm2, [coeffs + 64] ; -C7R3+C3R1 -C7r3+C3r1
paddd mm7, mm1 ; B0 b0
movq mm1, [coeffs + 72] ; -C5 -C1 -C5 -C1
pmaddwd mm1, mm3 ; -C5R7-C1R5 -C5r7-C1r5
paddd mm7, mm4 ; A0+B0 a0+b0
paddd mm4, mm4 ; 2A0 2a0
psubd mm4, mm7 ; A0-B0 a0-b0
paddd mm1, mm2 ; B1 b1
psrad mm7, %6
psrad mm4, %6
movq mm2, mm0 ; A1 a1
paddd mm0, mm1 ; A1+B1 a1+b1
psubd mm2, mm1 ; A1-B1 a1-b1
psrad mm0, %6
psrad mm2, %6
packssdw mm7, mm7 ; A0+B0 a0+b0
movd [%5], mm7
packssdw mm0, mm0 ; A1+B1 a1+b1
movd [16 + %5], mm0
packssdw mm2, mm2 ; A1-B1 a1-b1
movd [96 + %5], mm2
packssdw mm4, mm4 ; A0-B0 a0-b0
movd [112 + %5], mm4
movq mm0, %3 ; R3 R1 r3 r1
movq mm4, [coeffs + 80] ; -C1 C5 -C1 C5
pmaddwd mm4, mm0 ; -C1R3+C5R1 -C1r3+C5r1
movq mm7, [coeffs + 88] ; C3 C7 C3 C7
pmaddwd mm0, [coeffs + 96] ; -C5R3+C7R1 -C5r3+C7r1
pmaddwd mm7, mm3 ; C3R7+C7R5 C3r7+C7r5
movq mm2, mm5 ; A2 a2
pmaddwd mm3, [coeffs + 104] ; -C1R7+C3R5 -C1r7+C3r5
paddd mm4, mm7 ; B2 b2
paddd mm2, mm4 ; A2+B2 a2+b2
psubd mm5, mm4 ; a2-B2 a2-b2
psrad mm2, %6
psrad mm5, %6
movq mm4, mm6 ; A3 a3
paddd mm3, mm0 ; B3 b3
paddd mm6, mm3 ; A3+B3 a3+b3
psubd mm4, mm3 ; a3-B3 a3-b3
psrad mm6, %6
psrad mm4, %6
packssdw mm2, mm2 ; A2+B2 a2+b2
packssdw mm6, mm6 ; A3+B3 a3+b3
movd [32 + %5], mm2
packssdw mm4, mm4 ; A3-B3 a3-b3
packssdw mm5, mm5 ; A2-B2 a2-b2
movd [48 + %5], mm6
movd [64 + %5], mm4
movd [80 + %5], mm5
%endmacro
%macro IDCT5 6
movq mm0, %1 ; R4 R0 r4 r0
movq mm2, %3 ; R3 R1 r3 r1
movq mm4, [coeffs + 16] ; C4 C4 C4 C4
pmaddwd mm4, mm0 ; C4R4+C4R0 C4r4+C4r0
movq mm5, [coeffs + 24] ; -C4 C4 -C4 C4
pmaddwd mm0, mm5 ; -C4R4+C4R0 -C4r4+C4r0
movq mm6, mm4 ; C4R4+C4R0 C4r4+C4r0
movq mm7, [coeffs + 48] ; C3 C1 C3 C1
pmaddwd mm7, mm2 ; C3R3+C1R1 C3r3+C1r1
movq mm5, mm0 ; -C4R4+C4R0 -C4r4+C4r0
movq mm3, [coeffs + 64]
pmaddwd mm3, mm2 ; -C7R3+C3R1 -C7r3+C3r1
paddd mm7, mm4 ; A0+B0 a0+b0
paddd mm4, mm4 ; 2A0 2a0
psubd mm4, mm7 ; A0-B0 a0-b0
psrad mm7, %6
psrad mm4, %6
movq mm1, mm0 ; A1 a1
paddd mm0, mm3 ; A1+B1 a1+b1
psubd mm1, mm3 ; A1-B1 a1-b1
psrad mm0, %6
psrad mm1, %6
packssdw mm7, mm7 ; A0+B0 a0+b0
movd [%5], mm7
packssdw mm0, mm0 ; A1+B1 a1+b1
movd [16 + %5], mm0
packssdw mm1, mm1 ; A1-B1 a1-b1
movd [96 + %5], mm1
packssdw mm4, mm4 ; A0-B0 a0-b0
movd [112 + %5], mm4
movq mm4, [coeffs + 80] ; -C1 C5 -C1 C5
pmaddwd mm4, mm2 ; -C1R3+C5R1 -C1r3+C5r1
pmaddwd mm2, [coeffs + 96] ; -C5R3+C7R1 -C5r3+C7r1
movq mm1, mm5 ; A2 a2
paddd mm1, mm4 ; A2+B2 a2+b2
psubd mm5, mm4 ; a2-B2 a2-b2
psrad mm1, %6
psrad mm5, %6
movq mm4, mm6 ; A3 a3
paddd mm6, mm2 ; A3+B3 a3+b3
psubd mm4, mm2 ; a3-B3 a3-b3
psrad mm6, %6
psrad mm4, %6
packssdw mm1, mm1 ; A2+B2 a2+b2
packssdw mm6, mm6 ; A3+B3 a3+b3
movd [32 + %5], mm1
packssdw mm4, mm4 ; A3-B3 a3-b3
packssdw mm5, mm5 ; A2-B2 a2-b2
movd [48 + %5], mm6
movd [64 + %5], mm4
movd [80 + %5], mm5
%endmacro
%macro IDCT6 6
movq mm0, [%1] ; R4 R0 r4 r0
movq mm1, [%2] ; R6 R2 r6 r2
movq mm4, [coeffs + 16] ; C4 C4 C4 C4
pmaddwd mm4, mm0 ; C4R4+C4R0 C4r4+C4r0
movq mm5, [coeffs + 24] ; -C4 C4 -C4 C4
pmaddwd mm0, mm5 ; -C4R4+C4R0 -C4r4+C4r0
movq mm5, [coeffs + 32] ; C6 C2 C6 C2
pmaddwd mm5, mm1 ; C6R6+C2R2 C6r6+C2r2
movq mm6, [coeffs + 40] ; -C2 C6 -C2 C6
pmaddwd mm1, mm6 ; -C2R6+C6R2 -C2r6+C6r2
movq mm6, mm4 ; C4R4+C4R0 C4r4+C4r0
paddd mm4, mm5 ; A0 a0
psubd mm6, mm5 ; A3 a3
movq mm5, mm0 ; -C4R4+C4R0 -C4r4+C4r0
paddd mm0, mm1 ; A1 a1
psubd mm5, mm1 ; A2 a2
movq mm2, [8 + %1] ; R4 R0 r4 r0
movq mm3, [8 + %2] ; R6 R2 r6 r2
movq mm1, [coeffs + 16] ; C4 C4 C4 C4
pmaddwd mm1, mm2 ; C4R4+C4R0 C4r4+C4r0
movq mm7, [coeffs + 24] ; -C4 C4 -C4 C4
pmaddwd mm2, mm7 ; -C4R4+C4R0 -C4r4+C4r0
movq mm7, [coeffs + 32] ; C6 C2 C6 C2
pmaddwd mm7, mm3 ; C6R6+C2R2 C6r6+C2r2
pmaddwd mm3, [coeffs + 40] ; -C2R6+C6R2 -C2r6+C6r2
paddd mm7, mm1 ; A0 a0
paddd mm1, mm1 ; 2C0 2c0
psubd mm1, mm7 ; A3 a3
paddd mm3, mm2 ; A1 a1
paddd mm2, mm2 ; 2C1 2c1
psubd mm2, mm3 ; A2 a2
psrad mm4, %6
psrad mm7, %6
psrad mm3, %6
packssdw mm4, mm7 ; A0 a0
movq [%5], mm4
psrad mm0, %6
packssdw mm0, mm3 ; A1 a1
movq [16 + %5], mm0
movq [96 + %5], mm0
movq [112 + %5], mm4
psrad mm5, %6
psrad mm6, %6
psrad mm2, %6
packssdw mm5, mm2 ; A2-B2 a2-b2
movq [32 + %5], mm5
psrad mm1, %6
packssdw mm6, mm1 ; A3+B3 a3+b3
movq [48 + %5], mm6
movq [64 + %5], mm6
movq [80 + %5], mm5
%endmacro
%macro IDCT7 6
movq mm0, %1 ; R4 R0 r4 r0
movq mm1, %2 ; R6 R2 r6 r2
movq mm2, %3 ; R3 R1 r3 r1
movq mm4, [coeffs + 16] ; C4 C4 C4 C4
pmaddwd mm4, mm0 ; C4R4+C4R0 C4r4+C4r0
movq mm5, [coeffs + 24] ; -C4 C4 -C4 C4
pmaddwd mm0, mm5 ; -C4R4+C4R0 -C4r4+C4r0
movq mm5, [coeffs + 32] ; C6 C2 C6 C2
pmaddwd mm5, mm1 ; C6R6+C2R2 C6r6+C2r2
movq mm6, [coeffs + 40] ; -C2 C6 -C2 C6
pmaddwd mm1, mm6 ; -C2R6+C6R2 -C2r6+C6r2
movq mm6, mm4 ; C4R4+C4R0 C4r4+C4r0
movq mm7, [coeffs + 48] ; C3 C1 C3 C1
pmaddwd mm7, mm2 ; C3R3+C1R1 C3r3+C1r1
paddd mm4, mm5 ; A0 a0
psubd mm6, mm5 ; A3 a3
movq mm5, mm0 ; -C4R4+C4R0 -C4r4+C4r0
paddd mm0, mm1 ; A1 a1
psubd mm5, mm1 ; A2 a2
movq mm1, [coeffs + 64]
pmaddwd mm1, mm2 ; -C7R3+C3R1 -C7r3+C3r1
paddd mm7, mm4 ; A0+B0 a0+b0
paddd mm4, mm4 ; 2A0 2a0
psubd mm4, mm7 ; A0-B0 a0-b0
psrad mm7, %6
psrad mm4, %6
movq mm3, mm0 ; A1 a1
paddd mm0, mm1 ; A1+B1 a1+b1
psubd mm3, mm1 ; A1-B1 a1-b1
psrad mm0, %6
psrad mm3, %6
packssdw mm7, mm7 ; A0+B0 a0+b0
movd [%5], mm7
packssdw mm0, mm0 ; A1+B1 a1+b1
movd [16 + %5], mm0
packssdw mm3, mm3 ; A1-B1 a1-b1
movd [96 + %5], mm3
packssdw mm4, mm4 ; A0-B0 a0-b0
movd [112 + %5], mm4
movq mm4, [coeffs + 80] ; -C1 C5 -C1 C5
pmaddwd mm4, mm2 ; -C1R3+C5R1 -C1r3+C5r1
pmaddwd mm2, [coeffs + 96] ; -C5R3+C7R1 -C5r3+C7r1
movq mm3, mm5 ; A2 a2
paddd mm3, mm4 ; A2+B2 a2+b2
psubd mm5, mm4 ; a2-B2 a2-b2
psrad mm3, %6
psrad mm5, %6
movq mm4, mm6 ; A3 a3
paddd mm6, mm2 ; A3+B3 a3+b3
psubd mm4, mm2 ; a3-B3 a3-b3
psrad mm6, %6
packssdw mm3, mm3 ; A2+B2 a2+b2
movd [32 + %5], mm3
psrad mm4, %6
packssdw mm6, mm6 ; A3+B3 a3+b3
movd [48 + %5], mm6
packssdw mm4, mm4 ; A3-B3 a3-b3
packssdw mm5, mm5 ; A2-B2 a2-b2
movd [64 + %5], mm4
movd [80 + %5], mm5
%endmacro
%macro IDCT8 6
movq mm0, [%1] ; R4 R0 r4 r0
movq mm4, [coeffs + 16] ; C4 C4 C4 C4
pmaddwd mm4, mm0 ; C4R4+C4R0 C4r4+C4r0
movq mm5, [coeffs + 24] ; -C4 C4 -C4 C4
pmaddwd mm0, mm5 ; -C4R4+C4R0 -C4r4+C4r0
psrad mm4, %6
psrad mm0, %6
movq mm2, [8 + %1] ; R4 R0 r4 r0
movq mm1, [coeffs + 16] ; C4 C4 C4 C4
pmaddwd mm1, mm2 ; C4R4+C4R0 C4r4+C4r0
movq mm7, [coeffs + 24] ; -C4 C4 -C4 C4
pmaddwd mm2, mm7 ; -C4R4+C4R0 -C4r4+C4r0
movq mm7, [coeffs + 32] ; C6 C2 C6 C2
psrad mm1, %6
packssdw mm4, mm1 ; A0 a0
movq [%5], mm4
psrad mm2, %6
packssdw mm0, mm2 ; A1 a1
movq [16 + %5], mm0
movq [96 + %5], mm0
movq [112 + %5], mm4
movq [32 + %5], mm0
movq [48 + %5], mm4
movq [64 + %5], mm4
movq [80 + %5], mm0
%endmacro
%macro IDCT 0
DC_COND_IDCT 0, 8, 16, 24, rsp + 0, null, 11
Z_COND_IDCT 32, 40, 48, 56, rsp + 32, null, 11, %%4
Z_COND_IDCT 64, 72, 80, 88, rsp + 64, null, 11, %%2
Z_COND_IDCT 96, 104, 112, 120, rsp + 96, null, 11, %%1
IDCT1 [rsp + 0], [rsp + 64], [rsp + 32], [rsp + 96], blockq + 0, 20
IDCT1 [rsp + 8], [rsp + 72], [rsp + 40], [rsp + 104], blockq + 4, 20
IDCT1 [rsp + 16], [rsp + 80], [rsp + 48], [rsp + 112], blockq + 8, 20
IDCT1 [rsp + 24], [rsp + 88], [rsp + 56], [rsp + 120], blockq + 12, 20
jmp %%9
ALIGN 16
%%4:
Z_COND_IDCT 64, 72, 80, 88, rsp + 64, null, 11, %%6
Z_COND_IDCT 96, 104, 112, 120, rsp + 96, null, 11, %%5
IDCT2 [rsp + 0], [rsp + 64], [rsp + 32], [rsp + 96], blockq + 0, 20
IDCT2 [rsp + 8], [rsp + 72], [rsp + 40], [rsp + 104], blockq + 4, 20
IDCT2 [rsp + 16], [rsp + 80], [rsp + 48], [rsp + 112], blockq + 8, 20
IDCT2 [rsp + 24], [rsp + 88], [rsp + 56], [rsp + 120], blockq + 12, 20
jmp %%9
ALIGN 16
%%6:
Z_COND_IDCT 96, 104, 112, 120, rsp + 96, null, 11, %%7
IDCT3 [rsp + 0], [rsp + 64], [rsp + 32], [rsp + 96], blockq + 0, 20
IDCT3 [rsp + 8], [rsp + 72], [rsp + 40], [rsp + 104], blockq + 4, 20
IDCT3 [rsp + 16], [rsp + 80], [rsp + 48], [rsp + 112], blockq + 8, 20
IDCT3 [rsp + 24], [rsp + 88], [rsp + 56], [rsp + 120], blockq + 12, 20
jmp %%9
ALIGN 16
%%2:
Z_COND_IDCT 96, 104, 112, 120, rsp + 96, null, 11, %%3
IDCT4 [rsp + 0], [rsp + 64], [rsp + 32], [rsp + 96], blockq + 0, 20
IDCT4 [rsp + 8], [rsp + 72], [rsp + 40], [rsp + 104], blockq + 4, 20
IDCT4 [rsp + 16], [rsp + 80], [rsp + 48], [rsp + 112], blockq + 8, 20
IDCT4 [rsp + 24], [rsp + 88], [rsp + 56], [rsp + 120], blockq + 12, 20
jmp %%9
ALIGN 16
%%3:
IDCT5 [rsp + 0], [rsp + 64], [rsp + 32], [rsp + 96], blockq + 0, 20
IDCT5 [rsp + 8], [rsp + 72], [rsp + 40], [rsp + 104], blockq + 4, 20
IDCT5 [rsp + 16], [rsp + 80], [rsp + 48], [rsp + 112], blockq + 8, 20
IDCT5 [rsp + 24], [rsp + 88], [rsp + 56], [rsp + 120], blockq + 12, 20
jmp %%9
ALIGN 16
%%5:
IDCT6 rsp + 0, rsp + 64, rsp + 32, rsp + 96, blockq + 0, 20
IDCT6 rsp + 16, rsp + 80, rsp + 48, rsp + 112, blockq + 8, 20
jmp %%9
ALIGN 16
%%1:
IDCT7 [rsp + 0], [rsp + 64], [rsp + 32], [rsp + 96], blockq + 0, 20
IDCT7 [rsp + 8], [rsp + 72], [rsp + 40], [rsp + 104], blockq + 4, 20
IDCT7 [rsp + 16], [rsp + 80], [rsp + 48], [rsp + 112], blockq + 8, 20
IDCT7 [rsp + 24], [rsp + 88], [rsp + 56], [rsp + 120], blockq + 12, 20
jmp %%9
ALIGN 16
%%7:
IDCT8 rsp + 0, rsp + 64, rsp + 32, rsp + 96, blockq + 0, 20
IDCT8 rsp + 16, rsp + 80, rsp + 48, rsp + 112, blockq + 8, 20
%%9:
%endmacro
%macro PUT_PIXELS_CLAMPED_HALF 1
mova m0, [blockq+mmsize*0+%1]
mova m1, [blockq+mmsize*2+%1]
%if mmsize == 8
mova m2, [blockq+mmsize*4+%1]
mova m3, [blockq+mmsize*6+%1]
%endif
packuswb m0, [blockq+mmsize*1+%1]
packuswb m1, [blockq+mmsize*3+%1]
%if mmsize == 8
packuswb m2, [blockq+mmsize*5+%1]
packuswb m3, [blockq+mmsize*7+%1]
movq [pixelsq], m0
movq [lsizeq+pixelsq], m1
movq [2*lsizeq+pixelsq], m2
movq [lsize3q+pixelsq], m3
%else
movq [pixelsq], m0
movhps [lsizeq+pixelsq], m0
movq [2*lsizeq+pixelsq], m1
movhps [lsize3q+pixelsq], m1
%endif
%endmacro
%macro ADD_PIXELS_CLAMPED 1
mova m0, [blockq+mmsize*0+%1]
mova m1, [blockq+mmsize*1+%1]
%if mmsize == 8
mova m5, [blockq+mmsize*2+%1]
mova m6, [blockq+mmsize*3+%1]
%endif
movq m2, [pixelsq]
movq m3, [pixelsq+lsizeq]
%if mmsize == 8
mova m7, m2
punpcklbw m2, m4
punpckhbw m7, m4
paddsw m0, m2
paddsw m1, m7
mova m7, m3
punpcklbw m3, m4
punpckhbw m7, m4
paddsw m5, m3
paddsw m6, m7
%else
punpcklbw m2, m4
punpcklbw m3, m4
paddsw m0, m2
paddsw m1, m3
%endif
packuswb m0, m1
%if mmsize == 8
packuswb m5, m6
movq [pixelsq], m0
movq [pixelsq+lsizeq], m5
%else
movq [pixelsq], m0
movhps [pixelsq+lsizeq], m0
%endif
%endmacro
INIT_MMX mmx
cglobal simple_idct, 1, 2, 8, 128, block, t0
IDCT
RET
cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0
IDCT
lea lsize3q, [lsizeq*3]
PUT_PIXELS_CLAMPED_HALF 0
lea pixelsq, [pixelsq+lsizeq*4]
PUT_PIXELS_CLAMPED_HALF 64
RET
cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0
IDCT
pxor m4, m4
ADD_PIXELS_CLAMPED 0
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 32
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 64
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 96
RET
INIT_XMM sse2
cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0
IDCT
lea lsize3q, [lsizeq*3]
PUT_PIXELS_CLAMPED_HALF 0
lea pixelsq, [pixelsq+lsizeq*4]
PUT_PIXELS_CLAMPED_HALF 64
RET
cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0
IDCT
pxor m4, m4
ADD_PIXELS_CLAMPED 0
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 32
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 64
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 96
RET

View File

@ -0,0 +1,53 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_X86_SIMPLE_IDCT_H
#define AVCODEC_X86_SIMPLE_IDCT_H
#include <stddef.h>
#include <stdint.h>
void ff_simple_idct_mmx(int16_t *block);
void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct8_sse2(int16_t *block);
void ff_simple_idct8_avx(int16_t *block);
void ff_simple_idct8_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct8_put_avx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct8_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct8_add_avx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct10_sse2(int16_t *block);
void ff_simple_idct10_avx(int16_t *block);
void ff_simple_idct10_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct10_put_avx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct12_sse2(int16_t *block);
void ff_simple_idct12_avx(int16_t *block);
void ff_simple_idct12_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct12_put_avx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
#endif /* AVCODEC_X86_SIMPLE_IDCT_H */

View File

@ -0,0 +1,205 @@
;******************************************************************************
;* x86-SIMD-optimized IDCT for prores
;* this is identical to "simple" IDCT written by Michael Niedermayer
;* except for the clip range
;*
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
;* Copyright (c) 2015 Christophe Gisquet
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
%if ARCH_X86_64
SECTION_RODATA
cextern pw_2
cextern pw_16
cextern pw_32
cextern pw_1023
cextern pw_4095
pd_round_11: times 4 dd 1<<(11-1)
pd_round_12: times 4 dd 1<<(12-1)
pd_round_15: times 4 dd 1<<(15-1)
pd_round_19: times 4 dd 1<<(19-1)
pd_round_20: times 4 dd 1<<(20-1)
%macro CONST_DEC 3
const %1
times 4 dw %2, %3
%endmacro
%define W1sh2 22725 ; W1 = 90901 = 22725<<2 + 1
%define W2sh2 21407 ; W2 = 85627 = 21407<<2 - 1
%define W3sh2 19265 ; W3 = 77062 = 19265<<2 + 2
%define W4sh2 16384 ; W4 = 65535 = 16384<<2 - 1
%define W3sh2_lo 19266
%define W4sh2_lo 16383
%define W5sh2 12873 ; W5 = 51491 = 12873<<2 - 1
%define W6sh2 8867 ; W6 = 35468 = 8867<<2
%define W7sh2 4520 ; W7 = 18081 = 4520<<2 + 1
CONST_DEC w4_plus_w2_hi, W4sh2, +W2sh2
CONST_DEC w4_min_w2_hi, W4sh2, -W2sh2
CONST_DEC w4_plus_w6_hi, W4sh2, +W6sh2
CONST_DEC w4_min_w6_hi, W4sh2, -W6sh2
CONST_DEC w1_plus_w3_hi, W1sh2, +W3sh2
CONST_DEC w3_min_w1_hi, W3sh2, -W1sh2
CONST_DEC w7_plus_w3_hi, W7sh2, +W3sh2
CONST_DEC w3_min_w7_hi, W3sh2, -W7sh2
CONST_DEC w1_plus_w5, W1sh2, +W5sh2
CONST_DEC w5_min_w1, W5sh2, -W1sh2
CONST_DEC w5_plus_w7, W5sh2, +W7sh2
CONST_DEC w7_min_w5, W7sh2, -W5sh2
CONST_DEC w4_plus_w2_lo, W4sh2_lo, +W2sh2
CONST_DEC w4_min_w2_lo, W4sh2_lo, -W2sh2
CONST_DEC w4_plus_w6_lo, W4sh2_lo, +W6sh2
CONST_DEC w4_min_w6_lo, W4sh2_lo, -W6sh2
CONST_DEC w1_plus_w3_lo, W1sh2, +W3sh2_lo
CONST_DEC w3_min_w1_lo, W3sh2_lo, -W1sh2
CONST_DEC w7_plus_w3_lo, W7sh2, +W3sh2_lo
CONST_DEC w3_min_w7_lo, W3sh2_lo, -W7sh2
%include "libavcodec/x86/simple_idct10_template.asm"
SECTION .text
%macro STORE_HI_LO 12
movq %1, %9
movq %3, %10
movq %5, %11
movq %7, %12
movhps %2, %9
movhps %4, %10
movhps %6, %11
movhps %8, %12
%endmacro
%macro LOAD_ZXBW_8 16
pmovzxbw %1, %9
pmovzxbw %2, %10
pmovzxbw %3, %11
pmovzxbw %4, %12
pmovzxbw %5, %13
pmovzxbw %6, %14
pmovzxbw %7, %15
pmovzxbw %8, %16
%endmacro
%macro LOAD_ZXBW_4 9
movh %1, %5
movh %2, %6
movh %3, %7
movh %4, %8
punpcklbw %1, %9
punpcklbw %2, %9
punpcklbw %3, %9
punpcklbw %4, %9
%endmacro
%define PASS4ROWS(base, stride, stride3) \
[base], [base + stride], [base + 2*stride], [base + stride3]
%macro idct_fn 0
define_constants _lo
cglobal simple_idct8, 1, 1, 16, 32, block
IDCT_FN "", 11, pw_32, 20, "store"
RET
cglobal simple_idct8_put, 3, 4, 16, 32, pixels, lsize, block
IDCT_FN "", 11, pw_32, 20
lea r3, [3*lsizeq]
lea r2, [pixelsq + r3]
packuswb m8, m0
packuswb m1, m2
packuswb m4, m11
packuswb m9, m10
STORE_HI_LO PASS8ROWS(pixelsq, r2, lsizeq, r3), m8, m1, m4, m9
RET
cglobal simple_idct8_add, 3, 4, 16, 32, pixels, lsize, block
IDCT_FN "", 11, pw_32, 20
lea r2, [3*lsizeq]
%if cpuflag(sse4)
lea r3, [pixelsq + r2]
LOAD_ZXBW_8 m3, m5, m6, m7, m12, m13, m14, m15, PASS8ROWS(pixelsq, r3, lsizeq, r2)
paddsw m8, m3
paddsw m0, m5
paddsw m1, m6
paddsw m2, m7
paddsw m4, m12
paddsw m11, m13
paddsw m9, m14
paddsw m10, m15
%else
pxor m12, m12
LOAD_ZXBW_4 m3, m5, m6, m7, PASS4ROWS(pixelsq, lsizeq, r2), m12
paddsw m8, m3
paddsw m0, m5
paddsw m1, m6
paddsw m2, m7
lea r3, [pixelsq + 4*lsizeq]
LOAD_ZXBW_4 m3, m5, m6, m7, PASS4ROWS(r3, lsizeq, r2), m12
paddsw m4, m3
paddsw m11, m5
paddsw m9, m6
paddsw m10, m7
lea r3, [pixelsq + r2]
%endif
packuswb m8, m0
packuswb m1, m2
packuswb m4, m11
packuswb m9, m10
STORE_HI_LO PASS8ROWS(pixelsq, r3, lsizeq, r2), m8, m1, m4, m9
RET
define_constants _hi
cglobal simple_idct10, 1, 1, 16, block
IDCT_FN "", 12, "", 19, "store"
RET
cglobal simple_idct10_put, 3, 3, 16, pixels, lsize, block
IDCT_FN "", 12, "", 19, "put", 0, pw_1023
RET
cglobal simple_idct12, 1, 1, 16, block
; coeffs are already 15bits, adding the offset would cause
; overflow in the input
IDCT_FN "", 15, pw_2, 16, "store"
RET
cglobal simple_idct12_put, 3, 3, 16, pixels, lsize, block
; range isn't known, so the C simple_idct range is used
; Also, using a bias on input overflows, so use the bias
; on output of the first butterfly instead
IDCT_FN "", 15, pw_2, 16, "put", 0, pw_4095
RET
%endmacro
INIT_XMM sse2
idct_fn
%if HAVE_AVX_EXTERNAL
INIT_XMM avx
idct_fn
%endif
%endif

View File

@ -0,0 +1,369 @@
;******************************************************************************
;* x86-SIMD-optimized IDCT for prores
;* this is identical to "simple" IDCT written by Michael Niedermayer
;* except for the clip range
;*
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
; add SECTION_RODATA and proper include before including this file!
%if ARCH_X86_64
%macro define_constants 1
%undef w4_plus_w2
%undef w4_min_w2
%undef w4_plus_w6
%undef w4_min_w6
%undef w1_plus_w3
%undef w3_min_w1
%undef w7_plus_w3
%undef w3_min_w7
%define w4_plus_w2 w4_plus_w2%1
%define w4_min_w2 w4_min_w2%1
%define w4_plus_w6 w4_plus_w6%1
%define w4_min_w6 w4_min_w6%1
%define w1_plus_w3 w1_plus_w3%1
%define w3_min_w1 w3_min_w1%1
%define w7_plus_w3 w7_plus_w3%1
%define w3_min_w7 w3_min_w7%1
%endmacro
; interleave data while maintaining source
; %1=type, %2=dstlo, %3=dsthi, %4=src, %5=interleave
%macro SBUTTERFLY3 5
punpckl%1 m%2, m%4, m%5
punpckh%1 m%3, m%4, m%5
%endmacro
; %1/%2=src1/dst1, %3/%4=dst2, %5/%6=src2, %7=shift
; action: %3/%4 = %1/%2 - %5/%6; %1/%2 += %5/%6
; %1/%2/%3/%4 >>= %7; dword -> word (in %1/%3)
%macro SUMSUB_SHPK 7
psubd %3, %1, %5 ; { a0 - b0 }[0-3]
psubd %4, %2, %6 ; { a0 - b0 }[4-7]
paddd %1, %5 ; { a0 + b0 }[0-3]
paddd %2, %6 ; { a0 + b0 }[4-7]
psrad %1, %7
psrad %2, %7
psrad %3, %7
psrad %4, %7
packssdw %1, %2 ; row[0]
packssdw %3, %4 ; row[7]
%endmacro
; %1 = initial bias ("" if nop)
; %2 = number of bits to shift at the end
; %3 = qmat (for prores)
%macro IDCT_1D 2-3
; a0 = (W4 * row[0]) + (1 << (15 - 1));
; a1 = a0;
; a2 = a0;
; a3 = a0;
; a0 += W2 * row[2];
; a1 += W6 * row[2];
; a2 -= W6 * row[2];
; a3 -= W2 * row[2];
%ifstr %1
mova m15, [pd_round_ %+ %2]
%else
paddw m10, [%1]
%endif
SBUTTERFLY3 wd, 0, 1, 10, 8 ; { row[0], row[2] }[0-3]/[4-7]
pmaddwd m2, m0, [w4_plus_w6]
pmaddwd m3, m1, [w4_plus_w6]
pmaddwd m4, m0, [w4_min_w6]
pmaddwd m5, m1, [w4_min_w6]
pmaddwd m6, m0, [w4_min_w2]
pmaddwd m7, m1, [w4_min_w2]
pmaddwd m0, [w4_plus_w2]
pmaddwd m1, [w4_plus_w2]
%ifstr %1
; Adding 1<<(%2-1) for >=15 bits values
paddd m2, m15
paddd m3, m15
paddd m4, m15
paddd m5, m15
paddd m6, m15
paddd m7, m15
paddd m0, m15
paddd m1, m15
%endif
; a0: -1*row[0]-1*row[2]
; a1: -1*row[0]
; a2: -1*row[0]
; a3: -1*row[0]+1*row[2]
; a0 += W4*row[4] + W6*row[6]; i.e. -1*row[4]
; a1 -= W4*row[4] + W2*row[6]; i.e. -1*row[4]-1*row[6]
; a2 -= W4*row[4] - W2*row[6]; i.e. -1*row[4]+1*row[6]
; a3 += W4*row[4] - W6*row[6]; i.e. -1*row[4]
SBUTTERFLY3 wd, 8, 9, 13, 12 ; { row[4], row[6] }[0-3]/[4-7]
pmaddwd m10, m8, [w4_plus_w6]
pmaddwd m11, m9, [w4_plus_w6]
paddd m0, m10 ; a0[0-3]
paddd m1, m11 ; a0[4-7]
pmaddwd m10, m8, [w4_min_w6]
pmaddwd m11, m9, [w4_min_w6]
paddd m6, m10 ; a3[0-3]
paddd m7, m11 ; a3[4-7]
pmaddwd m10, m8, [w4_min_w2]
pmaddwd m11, m9, [w4_min_w2]
pmaddwd m8, [w4_plus_w2]
pmaddwd m9, [w4_plus_w2]
psubd m4, m10 ; a2[0-3] intermediate
psubd m5, m11 ; a2[4-7] intermediate
psubd m2, m8 ; a1[0-3] intermediate
psubd m3, m9 ; a1[4-7] intermediate
; load/store
mova [blockq+ 0], m0
mova [blockq+ 32], m2
mova [blockq+ 64], m4
mova [blockq+ 96], m6
mova m10,[blockq+ 16] ; { row[1] }[0-7]
mova m8, [blockq+ 48] ; { row[3] }[0-7]
mova m13,[blockq+ 80] ; { row[5] }[0-7]
mova m14,[blockq+112] ; { row[7] }[0-7]
mova [blockq+ 16], m1
mova [blockq+ 48], m3
mova [blockq+ 80], m5
mova [blockq+112], m7
%if %0 == 3
pmullw m10,[%3+ 16]
pmullw m8, [%3+ 48]
pmullw m13,[%3+ 80]
pmullw m14,[%3+112]
%endif
; b0 = MUL(W1, row[1]);
; MAC(b0, W3, row[3]);
; b1 = MUL(W3, row[1]);
; MAC(b1, -W7, row[3]);
; b2 = MUL(W5, row[1]);
; MAC(b2, -W1, row[3]);
; b3 = MUL(W7, row[1]);
; MAC(b3, -W5, row[3]);
SBUTTERFLY3 wd, 0, 1, 10, 8 ; { row[1], row[3] }[0-3]/[4-7]
pmaddwd m2, m0, [w3_min_w7]
pmaddwd m3, m1, [w3_min_w7]
pmaddwd m4, m0, [w5_min_w1]
pmaddwd m5, m1, [w5_min_w1]
pmaddwd m6, m0, [w7_min_w5]
pmaddwd m7, m1, [w7_min_w5]
pmaddwd m0, [w1_plus_w3]
pmaddwd m1, [w1_plus_w3]
; b0: +1*row[1]+2*row[3]
; b1: +2*row[1]-1*row[3]
; b2: -1*row[1]-1*row[3]
; b3: +1*row[1]+1*row[3]
; MAC(b0, W5, row[5]);
; MAC(b0, W7, row[7]);
; MAC(b1, -W1, row[5]);
; MAC(b1, -W5, row[7]);
; MAC(b2, W7, row[5]);
; MAC(b2, W3, row[7]);
; MAC(b3, W3, row[5]);
; MAC(b3, -W1, row[7]);
SBUTTERFLY3 wd, 8, 9, 13, 14 ; { row[5], row[7] }[0-3]/[4-7]
; b0: -1*row[5]+1*row[7]
; b1: -1*row[5]+1*row[7]
; b2: +1*row[5]+2*row[7]
; b3: +2*row[5]-1*row[7]
pmaddwd m10, m8, [w1_plus_w5]
pmaddwd m11, m9, [w1_plus_w5]
pmaddwd m12, m8, [w5_plus_w7]
pmaddwd m13, m9, [w5_plus_w7]
psubd m2, m10 ; b1[0-3]
psubd m3, m11 ; b1[4-7]
paddd m0, m12 ; b0[0-3]
paddd m1, m13 ; b0[4-7]
pmaddwd m12, m8, [w7_plus_w3]
pmaddwd m13, m9, [w7_plus_w3]
pmaddwd m8, [w3_min_w1]
pmaddwd m9, [w3_min_w1]
paddd m4, m12 ; b2[0-3]
paddd m5, m13 ; b2[4-7]
paddd m6, m8 ; b3[0-3]
paddd m7, m9 ; b3[4-7]
; row[0] = (a0 + b0) >> 15;
; row[7] = (a0 - b0) >> 15;
; row[1] = (a1 + b1) >> 15;
; row[6] = (a1 - b1) >> 15;
; row[2] = (a2 + b2) >> 15;
; row[5] = (a2 - b2) >> 15;
; row[3] = (a3 + b3) >> 15;
; row[4] = (a3 - b3) >> 15;
mova m8, [blockq+ 0] ; a0[0-3]
mova m9, [blockq+16] ; a0[4-7]
SUMSUB_SHPK m8, m9, m10, m11, m0, m1, %2
mova m0, [blockq+32] ; a1[0-3]
mova m1, [blockq+48] ; a1[4-7]
SUMSUB_SHPK m0, m1, m9, m11, m2, m3, %2
mova m1, [blockq+64] ; a2[0-3]
mova m2, [blockq+80] ; a2[4-7]
SUMSUB_SHPK m1, m2, m11, m3, m4, m5, %2
mova m2, [blockq+96] ; a3[0-3]
mova m3, [blockq+112] ; a3[4-7]
SUMSUB_SHPK m2, m3, m4, m5, m6, m7, %2
%endmacro
; void ff_prores_idct_put_10_<opt>(uint8_t *pixels, ptrdiff_t stride,
; int16_t *block, const int16_t *qmat);
; %1 = row shift
; %2 = row bias macro
; %3 = column shift
; %4 = column bias macro
; %5 = final action (nothing, "store", "put", "add")
; %6 = min pixel value
; %7 = max pixel value
; %8 = qmat (for prores)
%macro IDCT_FN 4-8
; for (i = 0; i < 8; i++)
; idctRowCondDC(block + i*8);
mova m10,[blockq+ 0] ; { row[0] }[0-7]
mova m8, [blockq+32] ; { row[2] }[0-7]
mova m13,[blockq+64] ; { row[4] }[0-7]
mova m12,[blockq+96] ; { row[6] }[0-7]
%if %0 == 8
pmullw m10,[%8+ 0]
pmullw m8, [%8+32]
pmullw m13,[%8+64]
pmullw m12,[%8+96]
IDCT_1D %1, %2, %8
%elif %2 == 11
; This copies the DC-only shortcut. When there is only a DC coefficient the
; C shifts the value and splats it to all coeffs rather than multiplying and
; doing the full IDCT. This causes a difference on 8-bit because the
; coefficient is 16383 rather than 16384 (which you can get with shifting).
por m1, m8, m13
por m1, m12
por m1, [blockq+ 16] ; { row[1] }[0-7]
por m1, [blockq+ 48] ; { row[3] }[0-7]
por m1, [blockq+ 80] ; { row[5] }[0-7]
por m1, [blockq+112] ; { row[7] }[0-7]
pxor m2, m2
pcmpeqw m1, m2
psllw m2, m10, 3
pand m2, m1
pcmpeqb m3, m3
pxor m1, m3
mova [rsp], m1
mova [rsp+16], m2
IDCT_1D %1, %2
mova m5, [rsp]
mova m6, [rsp+16]
pand m8, m5
por m8, m6
pand m0, m5
por m0, m6
pand m1, m5
por m1, m6
pand m2, m5
por m2, m6
pand m4, m5
por m4, m6
pand m11, m5
por m11, m6
pand m9, m5
por m9, m6
pand m10, m5
por m10, m6
%else
IDCT_1D %1, %2
%endif
; transpose for second part of IDCT
TRANSPOSE8x8W 8, 0, 1, 2, 4, 11, 9, 10, 3
mova [blockq+ 16], m0
mova [blockq+ 48], m2
mova [blockq+ 80], m11
mova [blockq+112], m10
SWAP 8, 10
SWAP 1, 8
SWAP 4, 13
SWAP 9, 12
; for (i = 0; i < 8; i++)
; idctSparseColAdd(dest + i, line_size, block + i);
IDCT_1D %3, %4
; clip/store
%if %0 >= 5
%ifidn %5,"store"
; No clamping, means pure idct
mova [blockq+ 0], m8
mova [blockq+ 16], m0
mova [blockq+ 32], m1
mova [blockq+ 48], m2
mova [blockq+ 64], m4
mova [blockq+ 80], m11
mova [blockq+ 96], m9
mova [blockq+112], m10
%elifidn %5,"put"
%ifidn %6, 0
pxor m3, m3
%else
mova m3, [%6]
%endif ; ifidn %6, 0
mova m5, [%7]
pmaxsw m8, m3
pmaxsw m0, m3
pmaxsw m1, m3
pmaxsw m2, m3
pmaxsw m4, m3
pmaxsw m11, m3
pmaxsw m9, m3
pmaxsw m10, m3
pminsw m8, m5
pminsw m0, m5
pminsw m1, m5
pminsw m2, m5
pminsw m4, m5
pminsw m11, m5
pminsw m9, m5
pminsw m10, m5
lea r2, [r1*3]
mova [r0 ], m8
mova [r0+r1 ], m0
mova [r0+r1*2], m1
mova [r0+r2 ], m2
lea r0, [r0+r1*4]
mova [r0 ], m4
mova [r0+r1 ], m11
mova [r0+r1*2], m9
mova [r0+r2 ], m10
%endif ; %5 action
%endif; if %0 >= 5
%endmacro
%endif

View File

@ -0,0 +1,36 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_XVIDIDCT_H
#define AVCODEC_XVIDIDCT_H
#include <stdint.h>
#include "avcodec.h"
#include "idctdsp.h"
void ff_xvid_idct(int16_t *const in);
void ff_xvid_idct_init(IDCTDSPContext *c, AVCodecContext *avctx);
void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth);
void ff_xvid_idct_init_mips(IDCTDSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth);
#endif /* AVCODEC_XVIDIDCT_H */