mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2024-11-27 21:40:34 +00:00
vf_colorspace: x86-64 SIMD (SSE2) optimizations.
This commit is contained in:
parent
2e2e08a35b
commit
5ce703a6bf
@ -128,4 +128,7 @@ void ff_colorspacedsp_init(ColorSpaceDSPContext *dsp)
|
||||
init_yuv2yuv_fns(2, 12);
|
||||
|
||||
dsp->multiply3x3 = multiply3x3_c;
|
||||
|
||||
if (ARCH_X86)
|
||||
ff_colorspacedsp_x86_init(dsp);
|
||||
}
|
||||
|
@ -48,4 +48,7 @@ typedef struct ColorSpaceDSPContext {
|
||||
|
||||
void ff_colorspacedsp_init(ColorSpaceDSPContext *dsp);
|
||||
|
||||
/* internal */
|
||||
void ff_colorspacedsp_x86_init(ColorSpaceDSPContext *dsp);
|
||||
|
||||
#endif /* AVFILTER_COLORSPACEDSP_H */
|
||||
|
@ -1,5 +1,6 @@
|
||||
OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
|
||||
OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
|
||||
OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
|
||||
OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o
|
||||
OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o
|
||||
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o
|
||||
@ -23,6 +24,7 @@ OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
|
||||
|
||||
YASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
|
||||
YASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
|
||||
YASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
|
||||
YASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o
|
||||
YASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o
|
||||
YASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
|
||||
|
1097
libavfilter/x86/colorspacedsp.asm
Normal file
1097
libavfilter/x86/colorspacedsp.asm
Normal file
File diff suppressed because it is too large
Load Diff
119
libavfilter/x86/colorspacedsp_init.c
Normal file
119
libavfilter/x86/colorspacedsp_init.c
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/x86/cpu.h"
|
||||
|
||||
#include "libavfilter/colorspacedsp.h"
|
||||
|
||||
#define decl_yuv2yuv_fn(t) \
|
||||
void ff_yuv2yuv_##t##_sse2(uint8_t *yuv_out[3], ptrdiff_t yuv_out_stride[3], \
|
||||
uint8_t *yuv_in[3], ptrdiff_t yuv_in_stride[3], \
|
||||
int w, int h, const int16_t yuv2yuv_coeffs[3][3][8], \
|
||||
const int16_t yuv_offset[2][8])
|
||||
|
||||
#define decl_yuv2yuv_fns(ss) \
|
||||
decl_yuv2yuv_fn(ss##p8to8); \
|
||||
decl_yuv2yuv_fn(ss##p10to8); \
|
||||
decl_yuv2yuv_fn(ss##p12to8); \
|
||||
decl_yuv2yuv_fn(ss##p8to10); \
|
||||
decl_yuv2yuv_fn(ss##p10to10); \
|
||||
decl_yuv2yuv_fn(ss##p12to10); \
|
||||
decl_yuv2yuv_fn(ss##p8to12); \
|
||||
decl_yuv2yuv_fn(ss##p10to12); \
|
||||
decl_yuv2yuv_fn(ss##p12to12)
|
||||
|
||||
decl_yuv2yuv_fns(420);
|
||||
decl_yuv2yuv_fns(422);
|
||||
decl_yuv2yuv_fns(444);
|
||||
|
||||
#define decl_yuv2rgb_fn(t) \
|
||||
void ff_yuv2rgb_##t##_sse2(int16_t *rgb_out[3], ptrdiff_t rgb_stride, \
|
||||
uint8_t *yuv_in[3], ptrdiff_t yuv_stride[3], \
|
||||
int w, int h, const int16_t coeff[3][3][8], \
|
||||
const int16_t yuv_offset[8])
|
||||
|
||||
#define decl_yuv2rgb_fns(ss) \
|
||||
decl_yuv2rgb_fn(ss##p8); \
|
||||
decl_yuv2rgb_fn(ss##p10); \
|
||||
decl_yuv2rgb_fn(ss##p12)
|
||||
|
||||
decl_yuv2rgb_fns(420);
|
||||
decl_yuv2rgb_fns(422);
|
||||
decl_yuv2rgb_fns(444);
|
||||
|
||||
#define decl_rgb2yuv_fn(t) \
|
||||
void ff_rgb2yuv_##t##_sse2(uint8_t *yuv_out[3], ptrdiff_t yuv_stride[3], \
|
||||
int16_t *rgb_in[3], ptrdiff_t rgb_stride, \
|
||||
int w, int h, const int16_t coeff[3][3][8], \
|
||||
const int16_t yuv_offset[8])
|
||||
|
||||
#define decl_rgb2yuv_fns(ss) \
|
||||
decl_rgb2yuv_fn(ss##p8); \
|
||||
decl_rgb2yuv_fn(ss##p10); \
|
||||
decl_rgb2yuv_fn(ss##p12)
|
||||
|
||||
decl_rgb2yuv_fns(420);
|
||||
decl_rgb2yuv_fns(422);
|
||||
decl_rgb2yuv_fns(444);
|
||||
|
||||
void ff_multiply3x3_sse2(int16_t *data[3], ptrdiff_t stride, int w, int h,
|
||||
const int16_t coeff[3][3][8]);
|
||||
|
||||
void ff_colorspacedsp_x86_init(ColorSpaceDSPContext *dsp)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) {
|
||||
#define assign_yuv2yuv_fns(idx, ss) \
|
||||
dsp->yuv2yuv[0][0][idx] = ff_yuv2yuv_##ss##p8to8_sse2; \
|
||||
dsp->yuv2yuv[0][1][idx] = ff_yuv2yuv_##ss##p8to10_sse2; \
|
||||
dsp->yuv2yuv[0][2][idx] = ff_yuv2yuv_##ss##p8to12_sse2; \
|
||||
dsp->yuv2yuv[1][0][idx] = ff_yuv2yuv_##ss##p10to8_sse2; \
|
||||
dsp->yuv2yuv[1][1][idx] = ff_yuv2yuv_##ss##p10to10_sse2; \
|
||||
dsp->yuv2yuv[1][2][idx] = ff_yuv2yuv_##ss##p10to12_sse2; \
|
||||
dsp->yuv2yuv[2][0][idx] = ff_yuv2yuv_##ss##p12to8_sse2; \
|
||||
dsp->yuv2yuv[2][1][idx] = ff_yuv2yuv_##ss##p12to10_sse2; \
|
||||
dsp->yuv2yuv[2][2][idx] = ff_yuv2yuv_##ss##p12to12_sse2
|
||||
|
||||
assign_yuv2yuv_fns(2, 420);
|
||||
assign_yuv2yuv_fns(1, 422);
|
||||
assign_yuv2yuv_fns(0, 444);
|
||||
|
||||
#define assign_yuv2rgb_fns(idx, ss) \
|
||||
dsp->yuv2rgb[0][idx] = ff_yuv2rgb_##ss##p8_sse2; \
|
||||
dsp->yuv2rgb[1][idx] = ff_yuv2rgb_##ss##p10_sse2; \
|
||||
dsp->yuv2rgb[2][idx] = ff_yuv2rgb_##ss##p12_sse2
|
||||
|
||||
assign_yuv2rgb_fns(2, 420);
|
||||
assign_yuv2rgb_fns(1, 422);
|
||||
assign_yuv2rgb_fns(0, 444);
|
||||
|
||||
#define assign_rgb2yuv_fns(idx, ss) \
|
||||
dsp->rgb2yuv[0][idx] = ff_rgb2yuv_##ss##p8_sse2; \
|
||||
dsp->rgb2yuv[1][idx] = ff_rgb2yuv_##ss##p10_sse2; \
|
||||
dsp->rgb2yuv[2][idx] = ff_rgb2yuv_##ss##p12_sse2
|
||||
|
||||
assign_rgb2yuv_fns(2, 420);
|
||||
assign_rgb2yuv_fns(1, 422);
|
||||
assign_rgb2yuv_fns(0, 444);
|
||||
|
||||
dsp->multiply3x3 = ff_multiply3x3_sse2;
|
||||
}
|
||||
}
|
@ -16,6 +16,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes)
|
||||
|
||||
# libavfilter tests
|
||||
AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
|
||||
AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
|
||||
|
||||
CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes)
|
||||
|
||||
|
@ -106,6 +106,9 @@ static const struct {
|
||||
#if CONFIG_BLEND_FILTER
|
||||
{ "vf_blend", checkasm_check_blend },
|
||||
#endif
|
||||
#if CONFIG_COLORSPACE_FILTER
|
||||
{ "vf_colorspace", checkasm_check_colorspace },
|
||||
#endif
|
||||
#endif
|
||||
{ NULL }
|
||||
};
|
||||
|
@ -33,6 +33,7 @@
|
||||
void checkasm_check_alacdsp(void);
|
||||
void checkasm_check_blend(void);
|
||||
void checkasm_check_bswapdsp(void);
|
||||
void checkasm_check_colorspace(void);
|
||||
void checkasm_check_flacdsp(void);
|
||||
void checkasm_check_fmtconvert(void);
|
||||
void checkasm_check_h264pred(void);
|
||||
|
314
tests/checkasm/vf_colorspace.c
Normal file
314
tests/checkasm/vf_colorspace.c
Normal file
@ -0,0 +1,314 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "checkasm.h"
|
||||
#include "libavfilter/colorspacedsp.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
|
||||
#define W 64
|
||||
#define H 64
|
||||
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
unsigned mask = bpp_mask[idepth]; \
|
||||
int n, m; \
|
||||
int bpp = 1 + (!!idepth); \
|
||||
int buf_size = W * H * bpp; \
|
||||
for (m = 0; m < 3; m++) { \
|
||||
int ss = m ? ss_w + ss_h : 0; \
|
||||
int plane_sz = buf_size >> ss; \
|
||||
for (n = 0; n < plane_sz; n += 4) { \
|
||||
unsigned r = rnd() & mask; \
|
||||
AV_WN32A(&src[m][n], r); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static const char *format_string[] = {
|
||||
"444", "422", "420"
|
||||
};
|
||||
|
||||
static unsigned bpp_mask[] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
|
||||
|
||||
static void check_yuv2yuv(void)
|
||||
{
|
||||
declare_func(void, uint8_t *dst[3], ptrdiff_t dst_stride[3],
|
||||
uint8_t *src[3], ptrdiff_t src_stride[3],
|
||||
int w, int h, const int16_t coeff[3][3][8],
|
||||
const int16_t off[2][8]);
|
||||
ColorSpaceDSPContext dsp;
|
||||
int idepth, odepth, fmt, n;
|
||||
LOCAL_ALIGNED_32(uint8_t, src_y, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src_u, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src_v, [W * H * 2]);
|
||||
uint8_t *src[3] = { src_y, src_u, src_v };
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0_y, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0_u, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0_v, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1_y, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1_u, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1_v, [W * H * 2]);
|
||||
uint8_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
|
||||
LOCAL_ALIGNED_32(int16_t, offset_buf, [16]);
|
||||
LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
|
||||
int16_t (*offset)[8] = (int16_t(*)[8]) offset_buf;
|
||||
int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
|
||||
|
||||
ff_colorspacedsp_init(&dsp);
|
||||
for (n = 0; n < 8; n++) {
|
||||
offset[0][n] = offset[1][n] = 16;
|
||||
|
||||
coeff[0][0][n] = (1 << 14) + (1 << 7) + 1;
|
||||
coeff[0][1][n] = (1 << 7) - 1;
|
||||
coeff[0][2][n] = -(1 << 8);
|
||||
coeff[1][0][n] = coeff[2][0][n] = 0;
|
||||
coeff[1][1][n] = (1 << 14) + (1 << 7);
|
||||
coeff[1][2][n] = -(1 << 7);
|
||||
coeff[2][2][n] = (1 << 14) - (1 << 6);
|
||||
coeff[2][1][n] = 1 << 6;
|
||||
}
|
||||
for (idepth = 0; idepth < 3; idepth++) {
|
||||
for (odepth = 0; odepth < 3; odepth++) {
|
||||
for (fmt = 0; fmt < 3; fmt++) {
|
||||
if (check_func(dsp.yuv2yuv[idepth][odepth][fmt],
|
||||
"ff_colorspacedsp_yuv2yuv_%sp%dto%d",
|
||||
format_string[fmt],
|
||||
idepth * 2 + 8, odepth * 2 + 8)) {
|
||||
int ss_w = !!fmt, ss_h = fmt == 2;
|
||||
int y_src_stride = W << !!idepth, y_dst_stride = W << !!odepth;
|
||||
int uv_src_stride = y_src_stride >> ss_w, uv_dst_stride = y_dst_stride >> ss_w;
|
||||
|
||||
randomize_buffers();
|
||||
call_ref(dst0, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
|
||||
src, (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
|
||||
W, H, coeff, offset);
|
||||
call_new(dst1, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
|
||||
src, (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
|
||||
W, H, coeff, offset);
|
||||
if (memcmp(dst0[0], dst1[0], y_dst_stride * H) ||
|
||||
memcmp(dst0[1], dst1[1], uv_dst_stride * H >> ss_h) ||
|
||||
memcmp(dst0[2], dst1[2], uv_dst_stride * H >> ss_h)) {
|
||||
fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
report("yuv2yuv");
|
||||
}
|
||||
|
||||
static void check_yuv2rgb(void)
|
||||
{
|
||||
declare_func(void, int16_t *dst[3], ptrdiff_t dst_stride,
|
||||
uint8_t *src[3], ptrdiff_t src_stride[3],
|
||||
int w, int h, const int16_t coeff[3][3][8],
|
||||
const int16_t off[8]);
|
||||
ColorSpaceDSPContext dsp;
|
||||
int idepth, fmt, n;
|
||||
LOCAL_ALIGNED_32(uint8_t, src_y, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src_u, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(uint8_t, src_v, [W * H * 2]);
|
||||
uint8_t *src[3] = { src_y, src_u, src_v };
|
||||
LOCAL_ALIGNED_32(int16_t, dst0_y, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst0_u, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst0_v, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst1_y, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst1_u, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst1_v, [W * H]);
|
||||
int16_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
|
||||
LOCAL_ALIGNED_32(int16_t, offset, [8]);
|
||||
LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
|
||||
int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
|
||||
|
||||
ff_colorspacedsp_init(&dsp);
|
||||
for (n = 0; n < 8; n++) {
|
||||
offset[n] = 16;
|
||||
|
||||
coeff[0][0][n] = coeff[1][0][n] = coeff[2][0][n] = (1 << 14) | 1;
|
||||
coeff[0][1][n] = coeff[2][2][n] = 0;
|
||||
coeff[0][2][n] = 1 << 13;
|
||||
coeff[1][1][n] = -(1 << 12);
|
||||
coeff[1][2][n] = 1 << 12;
|
||||
coeff[2][1][n] = 1 << 11;
|
||||
}
|
||||
for (idepth = 0; idepth < 3; idepth++) {
|
||||
for (fmt = 0; fmt < 3; fmt++) {
|
||||
if (check_func(dsp.yuv2rgb[idepth][fmt],
|
||||
"ff_colorspacedsp_yuv2rgb_%sp%d",
|
||||
format_string[fmt], idepth * 2 + 8)) {
|
||||
int ss_w = !!fmt, ss_h = fmt == 2;
|
||||
int y_src_stride = W << !!idepth;
|
||||
int uv_src_stride = y_src_stride >> ss_w;
|
||||
|
||||
randomize_buffers();
|
||||
call_ref(dst0, W, src,
|
||||
(ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
|
||||
W, H, coeff, offset);
|
||||
call_new(dst1, W, src,
|
||||
(ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
|
||||
W, H, coeff, offset);
|
||||
if (memcmp(dst0[0], dst1[0], W * H * sizeof(int16_t)) ||
|
||||
memcmp(dst0[1], dst1[1], W * H * sizeof(int16_t)) ||
|
||||
memcmp(dst0[2], dst1[2], W * H * sizeof(int16_t))) {
|
||||
fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
report("yuv2rgb");
|
||||
}
|
||||
|
||||
#undef randomize_buffers
|
||||
#define randomize_buffers() \
|
||||
do { \
|
||||
int y, x, p; \
|
||||
for (p = 0; p < 3; p++) { \
|
||||
for (y = 0; y < H; y++) { \
|
||||
for (x = 0; x < W; x++) { \
|
||||
int r = rnd() & 0x7fff; \
|
||||
r -= (32768 - 28672) >> 1; \
|
||||
src[p][y * W + x] = r; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void check_rgb2yuv(void)
|
||||
{
|
||||
declare_func(void, uint8_t *dst[3], ptrdiff_t dst_stride[3],
|
||||
int16_t *src[3], ptrdiff_t src_stride,
|
||||
int w, int h, const int16_t coeff[3][3][8],
|
||||
const int16_t off[8]);
|
||||
ColorSpaceDSPContext dsp;
|
||||
int odepth, fmt, n;
|
||||
LOCAL_ALIGNED_32(int16_t, src_y, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(int16_t, src_u, [W * H * 2]);
|
||||
LOCAL_ALIGNED_32(int16_t, src_v, [W * H * 2]);
|
||||
int16_t *src[3] = { src_y, src_u, src_v };
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0_y, [W * H]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0_u, [W * H]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0_v, [W * H]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1_y, [W * H]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1_u, [W * H]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1_v, [W * H]);
|
||||
uint8_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
|
||||
LOCAL_ALIGNED_32(int16_t, offset, [8]);
|
||||
LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
|
||||
int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
|
||||
|
||||
ff_colorspacedsp_init(&dsp);
|
||||
for (n = 0; n < 8; n++) {
|
||||
offset[n] = 16;
|
||||
|
||||
// these somewhat resemble bt601/smpte170m coefficients
|
||||
coeff[0][0][n] = lrint(0.3 * (1 << 14));
|
||||
coeff[0][1][n] = lrint(0.6 * (1 << 14));
|
||||
coeff[0][2][n] = lrint(0.1 * (1 << 14));
|
||||
coeff[1][0][n] = lrint(-0.15 * (1 << 14));
|
||||
coeff[1][1][n] = lrint(-0.35 * (1 << 14));
|
||||
coeff[1][2][n] = lrint(0.5 * (1 << 14));
|
||||
coeff[2][0][n] = lrint(0.5 * (1 << 14));
|
||||
coeff[2][1][n] = lrint(-0.42 * (1 << 14));
|
||||
coeff[2][2][n] = lrint(-0.08 * (1 << 14));
|
||||
}
|
||||
for (odepth = 0; odepth < 3; odepth++) {
|
||||
for (fmt = 0; fmt < 3; fmt++) {
|
||||
if (check_func(dsp.rgb2yuv[odepth][fmt],
|
||||
"ff_colorspacedsp_rgb2yuv_%sp%d",
|
||||
format_string[fmt], odepth * 2 + 8)) {
|
||||
int ss_w = !!fmt, ss_h = fmt == 2;
|
||||
int y_dst_stride = W << !!odepth;
|
||||
int uv_dst_stride = y_dst_stride >> ss_w;
|
||||
|
||||
randomize_buffers();
|
||||
call_ref(dst0, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
|
||||
src, W, W, H, coeff, offset);
|
||||
call_new(dst1, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
|
||||
src, W, W, H, coeff, offset);
|
||||
if (memcmp(dst0[0], dst1[0], H * y_dst_stride) ||
|
||||
memcmp(dst0[1], dst1[1], H * uv_dst_stride >> ss_h) ||
|
||||
memcmp(dst0[2], dst1[2], H * uv_dst_stride >> ss_h)) {
|
||||
fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
report("rgb2yuv");
|
||||
}
|
||||
|
||||
static void check_multiply3x3(void)
|
||||
{
|
||||
declare_func(void, int16_t *data[3], ptrdiff_t stride,
|
||||
int w, int h, const int16_t coeff[3][3][8]);
|
||||
ColorSpaceDSPContext dsp;
|
||||
LOCAL_ALIGNED_32(int16_t, dst0_y, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst0_u, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst0_v, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst1_y, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst1_u, [W * H]);
|
||||
LOCAL_ALIGNED_32(int16_t, dst1_v, [W * H]);
|
||||
int16_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
|
||||
int16_t **src = dst0;
|
||||
LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
|
||||
int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
|
||||
int n;
|
||||
|
||||
ff_colorspacedsp_init(&dsp);
|
||||
for (n = 0; n < 8; n++) {
|
||||
coeff[0][0][n] = lrint(0.85 * (1 << 14));
|
||||
coeff[0][1][n] = lrint(0.10 * (1 << 14));
|
||||
coeff[0][2][n] = lrint(0.05 * (1 << 14));
|
||||
coeff[1][0][n] = lrint(-0.1 * (1 << 14));
|
||||
coeff[1][1][n] = lrint(0.95 * (1 << 14));
|
||||
coeff[1][2][n] = lrint(0.15 * (1 << 14));
|
||||
coeff[2][0][n] = lrint(-0.2 * (1 << 14));
|
||||
coeff[2][1][n] = lrint(0.30 * (1 << 14));
|
||||
coeff[2][2][n] = lrint(0.90 * (1 << 14));
|
||||
}
|
||||
if (check_func(dsp.multiply3x3, "ff_colorspacedsp_multiply3x3")) {
|
||||
randomize_buffers();
|
||||
memcpy(dst1_y, dst0_y, W * H * sizeof(*dst1_y));
|
||||
memcpy(dst1_u, dst0_u, W * H * sizeof(*dst1_u));
|
||||
memcpy(dst1_v, dst0_v, W * H * sizeof(*dst1_v));
|
||||
call_ref(dst0, W, W, H, coeff);
|
||||
call_new(dst1, W, W, H, coeff);
|
||||
if (memcmp(dst0[0], dst1[0], H * W * sizeof(*dst0_y)) ||
|
||||
memcmp(dst0[1], dst1[1], H * W * sizeof(*dst0_u)) ||
|
||||
memcmp(dst0[2], dst1[2], H * W * sizeof(*dst0_v))) {
|
||||
fail();
|
||||
}
|
||||
}
|
||||
|
||||
report("multiply3x3");
|
||||
}
|
||||
|
||||
void checkasm_check_colorspace(void)
|
||||
{
|
||||
check_yuv2yuv();
|
||||
check_yuv2rgb();
|
||||
check_rgb2yuv();
|
||||
check_multiply3x3();
|
||||
}
|
Loading…
Reference in New Issue
Block a user