mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2024-11-27 21:40:34 +00:00
libavcodec/exr : add X86 SIMD for reorder_pixels
Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
08ec828de9
commit
9b8c1224d7
@ -286,7 +286,7 @@ OBJS-$(CONFIG_EIGHTSVX_FIB_DECODER) += 8svx.o
|
||||
OBJS-$(CONFIG_ESCAPE124_DECODER) += escape124.o
|
||||
OBJS-$(CONFIG_ESCAPE130_DECODER) += escape130.o
|
||||
OBJS-$(CONFIG_EVRC_DECODER) += evrcdec.o acelp_vectors.o lsp.o
|
||||
OBJS-$(CONFIG_EXR_DECODER) += exr.o
|
||||
OBJS-$(CONFIG_EXR_DECODER) += exr.o exrdsp.o
|
||||
OBJS-$(CONFIG_FFV1_DECODER) += ffv1dec.o ffv1.o
|
||||
OBJS-$(CONFIG_FFV1_ENCODER) += ffv1enc.o ffv1.o
|
||||
OBJS-$(CONFIG_FFWAVESYNTH_DECODER) += ffwavesynth.o
|
||||
|
@ -51,6 +51,7 @@
|
||||
#include "bswapdsp.h"
|
||||
#endif
|
||||
|
||||
#include "exrdsp.h"
|
||||
#include "get_bits.h"
|
||||
#include "internal.h"
|
||||
#include "mathops.h"
|
||||
@ -121,6 +122,7 @@ typedef struct EXRContext {
|
||||
AVClass *class;
|
||||
AVFrame *picture;
|
||||
AVCodecContext *avctx;
|
||||
ExrDSPContext dsp;
|
||||
|
||||
#if HAVE_BIGENDIAN
|
||||
BswapDSPContext bbdsp;
|
||||
@ -275,23 +277,7 @@ static void predictor(uint8_t *src, int size)
|
||||
}
|
||||
}
|
||||
|
||||
static void reorder_pixels(uint8_t *src, uint8_t *dst, int size)
|
||||
{
|
||||
const uint8_t *t1 = src;
|
||||
int half_size = size / 2;
|
||||
const uint8_t *t2 = src + half_size;
|
||||
uint8_t *s = dst;
|
||||
int i;
|
||||
|
||||
av_assert1(size % 2 == 0);
|
||||
|
||||
for (i = 0; i < half_size; i++) {
|
||||
*(s++) = *(t1++);
|
||||
*(s++) = *(t2++);
|
||||
}
|
||||
}
|
||||
|
||||
static int zip_uncompress(const uint8_t *src, int compressed_size,
|
||||
static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size,
|
||||
int uncompressed_size, EXRThreadData *td)
|
||||
{
|
||||
unsigned long dest_len = uncompressed_size;
|
||||
@ -300,13 +286,15 @@ static int zip_uncompress(const uint8_t *src, int compressed_size,
|
||||
dest_len != uncompressed_size)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
av_assert1(uncompressed_size % 2 == 0);
|
||||
|
||||
predictor(td->tmp, uncompressed_size);
|
||||
reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size);
|
||||
s->dsp.reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rle_uncompress(const uint8_t *src, int compressed_size,
|
||||
static int rle_uncompress(EXRContext *ctx, const uint8_t *src, int compressed_size,
|
||||
int uncompressed_size, EXRThreadData *td)
|
||||
{
|
||||
uint8_t *d = td->tmp;
|
||||
@ -345,8 +333,10 @@ static int rle_uncompress(const uint8_t *src, int compressed_size,
|
||||
if (dend != d)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
av_assert1(uncompressed_size % 2 == 0);
|
||||
|
||||
predictor(td->tmp, uncompressed_size);
|
||||
reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size);
|
||||
ctx->dsp.reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1152,7 +1142,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata,
|
||||
|
||||
if (data_size < uncompressed_size) {
|
||||
av_fast_padded_malloc(&td->uncompressed_data,
|
||||
&td->uncompressed_size, uncompressed_size);
|
||||
&td->uncompressed_size, uncompressed_size + 64);/* Force 64 padding for AVX2 reorder_pixels dst */
|
||||
|
||||
if (!td->uncompressed_data)
|
||||
return AVERROR(ENOMEM);
|
||||
@ -1161,7 +1151,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata,
|
||||
switch (s->compression) {
|
||||
case EXR_ZIP1:
|
||||
case EXR_ZIP16:
|
||||
ret = zip_uncompress(src, data_size, uncompressed_size, td);
|
||||
ret = zip_uncompress(s, src, data_size, uncompressed_size, td);
|
||||
break;
|
||||
case EXR_PIZ:
|
||||
ret = piz_uncompress(s, src, data_size, uncompressed_size, td);
|
||||
@ -1170,7 +1160,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata,
|
||||
ret = pxr24_uncompress(s, src, data_size, uncompressed_size, td);
|
||||
break;
|
||||
case EXR_RLE:
|
||||
ret = rle_uncompress(src, data_size, uncompressed_size, td);
|
||||
ret = rle_uncompress(s, src, data_size, uncompressed_size, td);
|
||||
break;
|
||||
case EXR_B44:
|
||||
case EXR_B44A:
|
||||
@ -1804,6 +1794,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
|
||||
|
||||
s->avctx = avctx;
|
||||
|
||||
ff_exrdsp_init(&s->dsp);
|
||||
|
||||
#if HAVE_BIGENDIAN
|
||||
ff_bswapdsp_init(&s->bbdsp);
|
||||
#endif
|
||||
|
47
libavcodec/exrdsp.c
Normal file
47
libavcodec/exrdsp.c
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "exrdsp.h"
|
||||
#include "config.h"
|
||||
|
||||
static void reorder_pixels_scalar(uint8_t *src, uint8_t *dst, ptrdiff_t size)
|
||||
{
|
||||
const uint8_t *t1 = src;
|
||||
int half_size = size / 2;
|
||||
const uint8_t *t2 = src + half_size;
|
||||
uint8_t *s = dst;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < half_size; i++) {
|
||||
*(s++) = *(t1++);
|
||||
*(s++) = *(t2++);
|
||||
}
|
||||
}
|
||||
|
||||
av_cold void ff_exrdsp_init(ExrDSPContext *c)
|
||||
{
|
||||
c->reorder_pixels = reorder_pixels_scalar;
|
||||
|
||||
if (ARCH_X86)
|
||||
ff_exrdsp_init_x86(c);
|
||||
}
|
32
libavcodec/exrdsp.h
Normal file
32
libavcodec/exrdsp.h
Normal file
@ -0,0 +1,32 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVCODEC_EXRDSP_H
|
||||
#define AVCODEC_EXRDSP_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "libavutil/common.h"
|
||||
|
||||
typedef struct ExrDSPContext {
|
||||
void (*reorder_pixels)(uint8_t *src, uint8_t *dst, ptrdiff_t size);
|
||||
} ExrDSPContext;
|
||||
|
||||
void ff_exrdsp_init(ExrDSPContext *c);
|
||||
void ff_exrdsp_init_x86(ExrDSPContext *c);
|
||||
|
||||
#endif /* AVCODEC_EXRDSP_H */
|
@ -52,6 +52,7 @@ OBJS-$(CONFIG_APNG_DECODER) += x86/pngdsp_init.o
|
||||
OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp.o
|
||||
OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp_init.o x86/synth_filter_init.o
|
||||
OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc_init.o
|
||||
OBJS-$(CONFIG_EXR_DECODER) += x86/exrdsp_init.o
|
||||
OBJS-$(CONFIG_OPUS_DECODER) += x86/opus_dsp_init.o
|
||||
OBJS-$(CONFIG_OPUS_ENCODER) += x86/opus_dsp_init.o
|
||||
OBJS-$(CONFIG_HEVC_DECODER) += x86/hevcdsp_init.o
|
||||
@ -153,6 +154,7 @@ X86ASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o x86/synth_filter.o
|
||||
X86ASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp.o \
|
||||
x86/dirac_dwt.o
|
||||
X86ASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o
|
||||
X86ASM-OBJS-$(CONFIG_EXR_DECODER) += x86/exrdsp.o
|
||||
X86ASM-OBJS-$(CONFIG_FLAC_DECODER) += x86/flacdsp.o
|
||||
ifdef CONFIG_GPL
|
||||
X86ASM-OBJS-$(CONFIG_FLAC_ENCODER) += x86/flac_dsp_gpl.o
|
||||
|
63
libavcodec/x86/exrdsp.asm
Normal file
63
libavcodec/x86/exrdsp.asm
Normal file
@ -0,0 +1,63 @@
|
||||
;******************************************************************************
|
||||
;* X86 Optimized functions for Open Exr Decoder
|
||||
;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC
|
||||
;*
|
||||
;* reorder_pixels based on patch by John Loy
|
||||
;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
SECTION .text
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; void ff_reorder_pixels(uint8_t *src, uint8_t *dst, ptrdiff_t size)
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
%macro REORDER_PIXELS 0
|
||||
cglobal reorder_pixels, 3,4,3, src1, dst, size, src2
|
||||
lea src2q, [src1q+sizeq] ; src2 = src + 2 * half_size
|
||||
add dstq, sizeq ; dst offset by size
|
||||
shr sizeq, 1 ; half_size
|
||||
add src1q, sizeq ; offset src by half_size
|
||||
neg sizeq ; size = offset for dst, src1, src2
|
||||
.loop:
|
||||
|
||||
%if cpuflag(avx2)
|
||||
vpermq m0, [src1q + sizeq], 0xd8; load first part
|
||||
vpermq m1, [src2q + sizeq], 0xd8; load second part
|
||||
%else
|
||||
mova m0, [src1q+sizeq] ; load first part
|
||||
movu m1, [src2q+sizeq] ; load second part
|
||||
%endif
|
||||
SBUTTERFLY bw, 0, 1, 2 ; interleaved
|
||||
mova [dstq+2*sizeq ], m0 ; copy to dst
|
||||
mova [dstq+2*sizeq+mmsize], m1
|
||||
add sizeq, mmsize
|
||||
jl .loop
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
REORDER_PIXELS
|
||||
|
||||
%if HAVE_AVX2_EXTERNAL
|
||||
INIT_YMM avx2
|
||||
REORDER_PIXELS
|
||||
%endif
|
39
libavcodec/x86/exrdsp_init.c
Normal file
39
libavcodec/x86/exrdsp_init.c
Normal file
@ -0,0 +1,39 @@
|
||||
/*
|
||||
* OpenEXR (.exr) image decoder
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavcodec/exrdsp.h"
|
||||
|
||||
void ff_reorder_pixels_sse2(uint8_t *src, uint8_t *dst, ptrdiff_t size);
|
||||
|
||||
void ff_reorder_pixels_avx2(uint8_t *src, uint8_t *dst, ptrdiff_t size);
|
||||
|
||||
av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
dsp->reorder_pixels = ff_reorder_pixels_sse2;
|
||||
}
|
||||
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
|
||||
dsp->reorder_pixels = ff_reorder_pixels_avx2;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user