diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 943e5db511..fad56129a3 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -286,7 +286,7 @@ OBJS-$(CONFIG_EIGHTSVX_FIB_DECODER) += 8svx.o OBJS-$(CONFIG_ESCAPE124_DECODER) += escape124.o OBJS-$(CONFIG_ESCAPE130_DECODER) += escape130.o OBJS-$(CONFIG_EVRC_DECODER) += evrcdec.o acelp_vectors.o lsp.o -OBJS-$(CONFIG_EXR_DECODER) += exr.o +OBJS-$(CONFIG_EXR_DECODER) += exr.o exrdsp.o OBJS-$(CONFIG_FFV1_DECODER) += ffv1dec.o ffv1.o OBJS-$(CONFIG_FFV1_ENCODER) += ffv1enc.o ffv1.o OBJS-$(CONFIG_FFWAVESYNTH_DECODER) += ffwavesynth.o diff --git a/libavcodec/exr.c b/libavcodec/exr.c index 759880756d..de2f05d3a9 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -51,6 +51,7 @@ #include "bswapdsp.h" #endif +#include "exrdsp.h" #include "get_bits.h" #include "internal.h" #include "mathops.h" @@ -121,6 +122,7 @@ typedef struct EXRContext { AVClass *class; AVFrame *picture; AVCodecContext *avctx; + ExrDSPContext dsp; #if HAVE_BIGENDIAN BswapDSPContext bbdsp; @@ -275,23 +277,7 @@ static void predictor(uint8_t *src, int size) } } -static void reorder_pixels(uint8_t *src, uint8_t *dst, int size) -{ - const uint8_t *t1 = src; - int half_size = size / 2; - const uint8_t *t2 = src + half_size; - uint8_t *s = dst; - int i; - - av_assert1(size % 2 == 0); - - for (i = 0; i < half_size; i++) { - *(s++) = *(t1++); - *(s++) = *(t2++); - } -} - -static int zip_uncompress(const uint8_t *src, int compressed_size, +static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size, int uncompressed_size, EXRThreadData *td) { unsigned long dest_len = uncompressed_size; @@ -300,13 +286,15 @@ static int zip_uncompress(const uint8_t *src, int compressed_size, dest_len != uncompressed_size) return AVERROR_INVALIDDATA; + av_assert1(uncompressed_size % 2 == 0); + predictor(td->tmp, uncompressed_size); - reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size); + s->dsp.reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size); return 0; } -static int rle_uncompress(const uint8_t *src, int compressed_size, +static int rle_uncompress(EXRContext *ctx, const uint8_t *src, int compressed_size, int uncompressed_size, EXRThreadData *td) { uint8_t *d = td->tmp; @@ -345,8 +333,10 @@ static int rle_uncompress(const uint8_t *src, int compressed_size, if (dend != d) return AVERROR_INVALIDDATA; + av_assert1(uncompressed_size % 2 == 0); + predictor(td->tmp, uncompressed_size); - reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size); + ctx->dsp.reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size); return 0; } @@ -1152,7 +1142,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata, if (data_size < uncompressed_size) { av_fast_padded_malloc(&td->uncompressed_data, - &td->uncompressed_size, uncompressed_size); + &td->uncompressed_size, uncompressed_size + 64);/* Force 64 padding for AVX2 reorder_pixels dst */ if (!td->uncompressed_data) return AVERROR(ENOMEM); @@ -1161,7 +1151,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata, switch (s->compression) { case EXR_ZIP1: case EXR_ZIP16: - ret = zip_uncompress(src, data_size, uncompressed_size, td); + ret = zip_uncompress(s, src, data_size, uncompressed_size, td); break; case EXR_PIZ: ret = piz_uncompress(s, src, data_size, uncompressed_size, td); @@ -1170,7 +1160,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata, ret = pxr24_uncompress(s, src, data_size, uncompressed_size, td); break; case EXR_RLE: - ret = rle_uncompress(src, data_size, uncompressed_size, td); + ret = rle_uncompress(s, src, data_size, uncompressed_size, td); break; case EXR_B44: case EXR_B44A: @@ -1804,6 +1794,8 @@ static av_cold int decode_init(AVCodecContext *avctx) s->avctx = avctx; + ff_exrdsp_init(&s->dsp); + #if HAVE_BIGENDIAN ff_bswapdsp_init(&s->bbdsp); #endif diff --git a/libavcodec/exrdsp.c b/libavcodec/exrdsp.c new file mode 100644 index 0000000000..e59dac3dc4 --- /dev/null +++ b/libavcodec/exrdsp.c @@ -0,0 +1,47 @@ +/* + * This file is part of FFmpeg. + * + * Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "exrdsp.h" +#include "config.h" + +static void reorder_pixels_scalar(uint8_t *src, uint8_t *dst, ptrdiff_t size) +{ + const uint8_t *t1 = src; + int half_size = size / 2; + const uint8_t *t2 = src + half_size; + uint8_t *s = dst; + int i; + + for (i = 0; i < half_size; i++) { + *(s++) = *(t1++); + *(s++) = *(t2++); + } +} + +av_cold void ff_exrdsp_init(ExrDSPContext *c) +{ + c->reorder_pixels = reorder_pixels_scalar; + + if (ARCH_X86) + ff_exrdsp_init_x86(c); +} diff --git a/libavcodec/exrdsp.h b/libavcodec/exrdsp.h new file mode 100644 index 0000000000..09a76a518e --- /dev/null +++ b/libavcodec/exrdsp.h @@ -0,0 +1,32 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_EXRDSP_H +#define AVCODEC_EXRDSP_H + +#include +#include "libavutil/common.h" + +typedef struct ExrDSPContext { + void (*reorder_pixels)(uint8_t *src, uint8_t *dst, ptrdiff_t size); +} ExrDSPContext; + +void ff_exrdsp_init(ExrDSPContext *c); +void ff_exrdsp_init_x86(ExrDSPContext *c); + +#endif /* AVCODEC_EXRDSP_H */ diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index e36644c72a..a805cd37b4 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -52,6 +52,7 @@ OBJS-$(CONFIG_APNG_DECODER) += x86/pngdsp_init.o OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp.o OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp_init.o x86/synth_filter_init.o OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc_init.o +OBJS-$(CONFIG_EXR_DECODER) += x86/exrdsp_init.o OBJS-$(CONFIG_OPUS_DECODER) += x86/opus_dsp_init.o OBJS-$(CONFIG_OPUS_ENCODER) += x86/opus_dsp_init.o OBJS-$(CONFIG_HEVC_DECODER) += x86/hevcdsp_init.o @@ -153,6 +154,7 @@ X86ASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o x86/synth_filter.o X86ASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp.o \ x86/dirac_dwt.o X86ASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o +X86ASM-OBJS-$(CONFIG_EXR_DECODER) += x86/exrdsp.o X86ASM-OBJS-$(CONFIG_FLAC_DECODER) += x86/flacdsp.o ifdef CONFIG_GPL X86ASM-OBJS-$(CONFIG_FLAC_ENCODER) += x86/flac_dsp_gpl.o diff --git a/libavcodec/x86/exrdsp.asm b/libavcodec/x86/exrdsp.asm new file mode 100644 index 0000000000..91d9c0b0a7 --- /dev/null +++ b/libavcodec/x86/exrdsp.asm @@ -0,0 +1,63 @@ +;****************************************************************************** +;* X86 Optimized functions for Open Exr Decoder +;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC +;* +;* reorder_pixels based on patch by John Loy +;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +;------------------------------------------------------------------------------ +; void ff_reorder_pixels(uint8_t *src, uint8_t *dst, ptrdiff_t size) +;------------------------------------------------------------------------------ + +%macro REORDER_PIXELS 0 +cglobal reorder_pixels, 3,4,3, src1, dst, size, src2 + lea src2q, [src1q+sizeq] ; src2 = src + 2 * half_size + add dstq, sizeq ; dst offset by size + shr sizeq, 1 ; half_size + add src1q, sizeq ; offset src by half_size + neg sizeq ; size = offset for dst, src1, src2 +.loop: + +%if cpuflag(avx2) + vpermq m0, [src1q + sizeq], 0xd8; load first part + vpermq m1, [src2q + sizeq], 0xd8; load second part +%else + mova m0, [src1q+sizeq] ; load first part + movu m1, [src2q+sizeq] ; load second part +%endif + SBUTTERFLY bw, 0, 1, 2 ; interleaved + mova [dstq+2*sizeq ], m0 ; copy to dst + mova [dstq+2*sizeq+mmsize], m1 + add sizeq, mmsize + jl .loop + RET +%endmacro + +INIT_XMM sse2 +REORDER_PIXELS + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +REORDER_PIXELS +%endif diff --git a/libavcodec/x86/exrdsp_init.c b/libavcodec/x86/exrdsp_init.c new file mode 100644 index 0000000000..c0f508b2c4 --- /dev/null +++ b/libavcodec/x86/exrdsp_init.c @@ -0,0 +1,39 @@ +/* + * OpenEXR (.exr) image decoder + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/exrdsp.h" + +void ff_reorder_pixels_sse2(uint8_t *src, uint8_t *dst, ptrdiff_t size); + +void ff_reorder_pixels_avx2(uint8_t *src, uint8_t *dst, ptrdiff_t size); + +av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp) +{ + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_SSE2(cpu_flags)) { + dsp->reorder_pixels = ff_reorder_pixels_sse2; + } + if (EXTERNAL_AVX2_FAST(cpu_flags)) { + dsp->reorder_pixels = ff_reorder_pixels_avx2; + } +}