diff --git a/common.mak b/common.mak index 172664011c..dd5de7a508 100644 --- a/common.mak +++ b/common.mak @@ -124,4 +124,7 @@ CLEANSUFFIXES = *.d *.o *~ *.h.c *.map *.ver *.ho *.gcno *.gcda DISTCLEANSUFFIXES = *.pc LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a +clean:: + $(RM) $(OBJS) $(OBJS:.o=.d) + -include $(wildcard $(OBJS:.o=.d) $(HOSTOBJS:.o=.d) $(TESTOBJS:.o=.d) $(HOBJS:.o=.d)) diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index a8e531cf18..745a5bdfe2 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -8,6 +8,9 @@ OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_init_arm.o \ ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o +OBJS-$(CONFIG_FLAC_DECODER) += arm/flacdsp_init_arm.o \ + arm/flacdsp_arm.o \ + OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o diff --git a/libavcodec/arm/flacdsp_arm.S b/libavcodec/arm/flacdsp_arm.S new file mode 100644 index 0000000000..f8861c5967 --- /dev/null +++ b/libavcodec/arm/flacdsp_arm.S @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2012 Mans Rullgard + * + * This file is part of FFmpeg + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/arm/asm.S" + +function flac_lpc_16_1_arm + ldr r12, [sp] + push {r4, lr} + ldr r1, [r1] + subs r12, r12, #2 + ldr lr, [r0], #4 + beq 2f + it lt + poplt {r4, pc} +1: + mul r4, lr, r1 + ldm r0, {r2, lr} + add_sh r2, r2, r4, asr r3 + mul r4, r2, r1 + subs r12, r12, #2 + add_sh lr, lr, r4, asr r3 + stm r0!, {r2, lr} + bgt 1b + it lt + poplt {r4, pc} +2: + mul r4, lr, r1 + ldr r2, [r0] + add_sh r2, r2, r4, asr r3 + str r2, [r0] + pop {r4, pc} +endfunc + +function flac_lpc_16_2_arm + ldr r12, [sp] + subs r12, r12, r2 + it le + bxle lr + + push {r4-r9, lr} + ldm r0!, {r6, r7} + ldm r1, {r8, r9} + subs r12, r12, #1 + beq 2f +1: + mul r4, r6, r8 + mul r5, r7, r8 + mla r4, r7, r9, r4 + ldm r0, {r6, r7} + add_sh r6, r6, r4, asr r3 + mla r5, r6, r9, r5 + add_sh r7, r7, r5, asr r3 + stm r0!, {r6, r7} + subs r12, r12, #2 + bgt 1b + it lt + poplt {r4-r9, pc} +2: + mul r4, r6, r8 + mla r4, r7, r9, r4 + ldr r5, [r0] + add_sh r5, r5, r4, asr r3 + str r5, [r0] + pop {r4-r9, pc} +endfunc + +function ff_flac_lpc_16_arm, export=1 + cmp r2, #2 + blt flac_lpc_16_1_arm + beq flac_lpc_16_2_arm + + ldr r12, [sp] + subs r12, r12, r2 + it le + bxle lr + + push {r4-r9, lr} + + subs r12, r12, #1 + beq 3f +1: + sub lr, r2, #2 + mov r4, #0 + mov r5, #0 + + ldr r7, [r0], #4 + ldr r9, [r1], #4 +2: + mla r4, r7, r9, r4 + ldm r0!, {r6, r7} + mla r5, r6, r9, r5 + ldm r1!, {r8, r9} + mla r4, r6, r8, r4 + subs lr, lr, #2 + mla r5, r7, r8, r5 + bgt 2b + blt 6f + + mla r4, r7, r9, r4 + ldr r7, [r0], #4 + mla r5, r7, r9, r5 + ldr r9, [r1], #4 +6: + mla r4, r7, r9, r4 + ldm r0, {r6, r7} + add_sh r6, r6, r4, asr r3 + mla r5, r6, r9, r5 + add_sh r7, r7, r5, asr r3 + stm r0!, {r6, r7} + sub r0, r0, r2, lsl #2 + sub r1, r1, r2, lsl #2 + + subs r12, r12, #2 + bgt 1b + it lt + poplt {r4-r9, pc} +3: + mov r4, #0 +4: + ldr r5, [r1], #4 + ldr r6, [r0], #4 + mla r4, r5, r6, r4 + subs r2, r2, #1 + bgt 4b + ldr r5, [r0] + add_sh r5, r5, r4, asr r3 + str r5, [r0] + pop {r4-r9, pc} +endfunc diff --git a/libavcodec/arm/flacdsp_init_arm.c b/libavcodec/arm/flacdsp_init_arm.c new file mode 100644 index 0000000000..9b9394280f --- /dev/null +++ b/libavcodec/arm/flacdsp_init_arm.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2012 Mans Rullgard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/flacdsp.h" +#include "config.h" + +void ff_flac_lpc_16_arm(int32_t *samples, const int coeffs[32], int order, + int qlevel, int len); + +av_cold void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, + int bps) +{ + if (bps <= 16) + c->lpc = ff_flac_lpc_16_arm; +} diff --git a/libavcodec/bmp.c b/libavcodec/bmp.c index e97038c5fe..7e160ae556 100644 --- a/libavcodec/bmp.c +++ b/libavcodec/bmp.c @@ -232,9 +232,6 @@ static int bmp_decode_frame(AVCodecContext *avctx, if(comp == BMP_RLE4 || comp == BMP_RLE8) memset(p->data[0], 0, avctx->height * p->linesize[0]); - if(depth == 4 || depth == 8) - memset(p->data[1], 0, 1024); - if(height > 0){ ptr = p->data[0] + (avctx->height - 1) * p->linesize[0]; linesize = -p->linesize[0]; @@ -245,6 +242,9 @@ static int bmp_decode_frame(AVCodecContext *avctx, if(avctx->pix_fmt == PIX_FMT_PAL8){ int colors = 1 << depth; + + memset(p->data[1], 0, 1024); + if(ihsize >= 36){ int t; buf = buf0 + 46; diff --git a/libavcodec/flacdsp.c b/libavcodec/flacdsp.c index 6c2458fc10..e51a91a07c 100644 --- a/libavcodec/flacdsp.c +++ b/libavcodec/flacdsp.c @@ -21,6 +21,7 @@ #include "libavutil/attributes.h" #include "libavutil/samplefmt.h" #include "flacdsp.h" +#include "config.h" #define SAMPLE_SIZE 16 #define PLANAR 0 @@ -119,4 +120,7 @@ av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, c->decorrelate[3] = flac_decorrelate_ms_c_16p; break; } + + if (ARCH_ARM) + ff_flacdsp_init_arm(c, fmt, bps); } diff --git a/libavcodec/flacdsp.h b/libavcodec/flacdsp.h index efb5e02ef4..00be2659ce 100644 --- a/libavcodec/flacdsp.h +++ b/libavcodec/flacdsp.h @@ -30,5 +30,6 @@ typedef struct FLACDSPContext { } FLACDSPContext; void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int bps); +void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int bps); #endif /* AVCODEC_FLACDSP_H */ diff --git a/libavcodec/mpegvideo_motion.c b/libavcodec/mpegvideo_motion.c index 354b60b386..2e5f7e611e 100644 --- a/libavcodec/mpegvideo_motion.c +++ b/libavcodec/mpegvideo_motion.c @@ -30,9 +30,9 @@ #include "msmpeg4.h" #include -static inline void gmc1_motion(MpegEncContext *s, - uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, - uint8_t **ref_picture) +static void gmc1_motion(MpegEncContext *s, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + uint8_t **ref_picture) { uint8_t *ptr; int offset, src_x, src_y, linesize, uvlinesize; @@ -116,9 +116,9 @@ static inline void gmc1_motion(MpegEncContext *s, return; } -static inline void gmc_motion(MpegEncContext *s, - uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, - uint8_t **ref_picture) +static void gmc_motion(MpegEncContext *s, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + uint8_t **ref_picture) { uint8_t *ptr; int linesize, uvlinesize; @@ -174,11 +174,8 @@ static inline void gmc_motion(MpegEncContext *s, static inline int hpel_motion(MpegEncContext *s, uint8_t *dest, uint8_t *src, - int field_based, int field_select, int src_x, int src_y, - int width, int height, int stride, - int h_edge_pos, int v_edge_pos, - int w, int h, op_pixels_func *pix_op, + op_pixels_func *pix_op, int motion_x, int motion_y) { int dxy; @@ -189,26 +186,24 @@ static inline int hpel_motion(MpegEncContext *s, src_y += motion_y >> 1; /* WARNING: do no forget half pels */ - src_x = av_clip(src_x, -16, width); //FIXME unneeded for emu? - if (src_x == width) + src_x = av_clip(src_x, -16, s->width); //FIXME unneeded for emu? + if (src_x == s->width) dxy &= ~1; - src_y = av_clip(src_y, -16, height); - if (src_y == height) + src_y = av_clip(src_y, -16, s->height); + if (src_y == s->height) dxy &= ~2; - src += src_y * stride + src_x; + src += src_y * s->linesize + src_x; if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){ - if( (unsigned)src_x > FFMAX(h_edge_pos - (motion_x&1) - w, 0) - || (unsigned)src_y > FFMAX(v_edge_pos - (motion_y&1) - h, 0)){ - s->dsp.emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<v_edge_pos); + if( (unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x&1) - 8, 0) + || (unsigned)src_y > FFMAX(s->v_edge_pos - (motion_y&1) - 8, 0)){ + s->dsp.emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, 9, 9, + src_x, src_y, s->h_edge_pos, s->v_edge_pos); src= s->edge_emu_buffer; emu=1; } } - if(field_select) - src += s->linesize; - pix_op[dxy](dest, src, stride, h); + pix_op[dxy](dest, src, s->linesize, 8); return emu; } @@ -447,11 +442,9 @@ static inline void obmc_motion(MpegEncContext *s, ptr[i]= ptr[MID]; }else{ ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1); - hpel_motion(s, ptr[i], src, 0, 0, + hpel_motion(s, ptr[i], src, src_x, src_y, - s->width, s->height, s->linesize, - s->h_edge_pos, s->v_edge_pos, - 8, 8, pix_op, + pix_op, mv[i][0], mv[i][1]); } } @@ -554,11 +547,12 @@ static inline void qpel_motion(MpegEncContext *s, /** * h263 chroma 4mv motion compensation. */ -static inline void chroma_4mv_motion(MpegEncContext *s, - uint8_t *dest_cb, uint8_t *dest_cr, - uint8_t **ref_picture, - op_pixels_func *pix_op, - int mx, int my){ +static void chroma_4mv_motion(MpegEncContext *s, + uint8_t *dest_cb, uint8_t *dest_cr, + uint8_t **ref_picture, + op_pixels_func *pix_op, + int mx, int my) +{ int dxy, emu=0, src_x, src_y, offset; uint8_t *ptr; @@ -773,11 +767,9 @@ static av_always_inline void MPV_motion_internal(MpegEncContext *s, }else{ for(i=0;i<4;i++) { hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize, - ref_picture[0], 0, 0, + ref_picture[0], mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8, - s->width, s->height, s->linesize, - s->h_edge_pos, s->v_edge_pos, - 8, 8, pix_op[1], + pix_op[1], s->mv[dir][i][0], s->mv[dir][i][1]); mx += s->mv[dir][i][0]; diff --git a/libavcodec/nellymoserdec.c b/libavcodec/nellymoserdec.c index 20ab8f84cb..ae98bf5c31 100644 --- a/libavcodec/nellymoserdec.c +++ b/libavcodec/nellymoserdec.c @@ -48,13 +48,11 @@ typedef struct NellyMoserDecodeContext { AVCodecContext* avctx; AVFrame frame; - float *float_buf; AVLFG random_state; GetBitContext gb; float scale_bias; DSPContext dsp; FFTContext imdct_ctx; - FmtConvertContext fmt_conv; DECLARE_ALIGNED(32, float, imdct_buf)[2][NELLY_BUF_LEN]; float *imdct_out; float *imdct_prev; @@ -124,19 +122,8 @@ static av_cold int decode_init(AVCodecContext * avctx) { ff_dsputil_init(&s->dsp, avctx); - if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) { - s->scale_bias = 1.0/(32768*8); - avctx->sample_fmt = AV_SAMPLE_FMT_FLT; - } else { - s->scale_bias = 1.0/(1*8); - avctx->sample_fmt = AV_SAMPLE_FMT_S16; - ff_fmt_convert_init(&s->fmt_conv, avctx); - s->float_buf = av_mallocz(NELLY_SAMPLES * sizeof(*s->float_buf)); - if (!s->float_buf) { - av_log(avctx, AV_LOG_ERROR, "error allocating float buffer\n"); - return AVERROR(ENOMEM); - } - } + s->scale_bias = 1.0/(32768*8); + avctx->sample_fmt = AV_SAMPLE_FMT_FLT; /* Generate overlap window */ if (!ff_sine_128[127]) @@ -158,7 +145,6 @@ static int decode_tag(AVCodecContext *avctx, void *data, int buf_size = avpkt->size; NellyMoserDecodeContext *s = avctx->priv_data; int blocks, i, ret; - int16_t *samples_s16; float *samples_flt; blocks = buf_size / NELLY_BLOCK_LEN; @@ -188,18 +174,11 @@ static int decode_tag(AVCodecContext *avctx, void *data, av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); return ret; } - samples_s16 = (int16_t *)s->frame.data[0]; samples_flt = (float *)s->frame.data[0]; for (i=0 ; isample_fmt == AV_SAMPLE_FMT_FLT) { - nelly_decode_block(s, buf, samples_flt); - samples_flt += NELLY_SAMPLES; - } else { - nelly_decode_block(s, buf, s->float_buf); - s->fmt_conv.float_to_int16(samples_s16, s->float_buf, NELLY_SAMPLES); - samples_s16 += NELLY_SAMPLES; - } + nelly_decode_block(s, buf, samples_flt); + samples_flt += NELLY_SAMPLES; buf += NELLY_BLOCK_LEN; } @@ -212,7 +191,6 @@ static int decode_tag(AVCodecContext *avctx, void *data, static av_cold int decode_end(AVCodecContext * avctx) { NellyMoserDecodeContext *s = avctx->priv_data; - av_freep(&s->float_buf); ff_mdct_end(&s->imdct_ctx); return 0; @@ -229,6 +207,5 @@ AVCodec ff_nellymoser_decoder = { .capabilities = CODEC_CAP_DR1 | CODEC_CAP_PARAM_CHANGE, .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"), .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLT, - AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, }; diff --git a/libavutil/arm/asm.S b/libavutil/arm/asm.S index ccb52316c7..340ee18e8a 100644 --- a/libavutil/arm/asm.S +++ b/libavutil/arm/asm.S @@ -186,6 +186,12 @@ ELF .size \name, . - \name #endif .endm +.macro add_sh rd, rn, rm, sh:vararg +A add \rd, \rn, \rm, \sh +T mov \rm, \rm, \sh +T add \rd, \rn, \rm +.endm + .macro ldr_pre rt, rn, rm:vararg A ldr \rt, [\rn, \rm]! T add \rn, \rn, \rm