From bbfcb1b7c891319a3eb4420c144ddae471942631 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Sun, 12 Nov 2017 19:11:51 +0100 Subject: [PATCH] avfilter/vf_threshold: add x86 SIMD Signed-off-by: Paul B Mahol --- libavfilter/threshold.h | 51 +++++++++++++++++++++ libavfilter/vf_threshold.c | 28 +++--------- libavfilter/x86/Makefile | 2 + libavfilter/x86/vf_threshold.asm | 69 +++++++++++++++++++++++++++++ libavfilter/x86/vf_threshold_init.c | 41 +++++++++++++++++ 5 files changed, 169 insertions(+), 22 deletions(-) create mode 100644 libavfilter/threshold.h create mode 100644 libavfilter/x86/vf_threshold.asm create mode 100644 libavfilter/x86/vf_threshold_init.c diff --git a/libavfilter/threshold.h b/libavfilter/threshold.h new file mode 100644 index 0000000000..8b55ad6ba1 --- /dev/null +++ b/libavfilter/threshold.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_THRESHOLD_H +#define AVFILTER_THRESHOLD_H + +#include "avfilter.h" +#include "framesync.h" + +typedef struct ThresholdContext { + const AVClass *class; + + int depth; + int planes; + int bpc; + + int nb_planes; + int width[4], height[4]; + + void (*threshold)(const uint8_t *in, const uint8_t *threshold, + const uint8_t *min, const uint8_t *max, + uint8_t *out, + ptrdiff_t ilinesize, ptrdiff_t tlinesize, + ptrdiff_t flinesize, ptrdiff_t slinesize, + ptrdiff_t olinesize, + int w, int h); + + AVFrame *frames[4]; + FFFrameSync fs; +} ThresholdContext; + +void ff_threshold_init_x86(ThresholdContext *s); + +#endif /* AVFILTER_THRESHOLD_H */ diff --git a/libavfilter/vf_threshold.c b/libavfilter/vf_threshold.c index 76c3ddb892..4183b353d2 100644 --- a/libavfilter/vf_threshold.c +++ b/libavfilter/vf_threshold.c @@ -31,27 +31,7 @@ #include "framesync.h" #include "internal.h" #include "video.h" - -typedef struct ThresholdContext { - const AVClass *class; - - int planes; - int bpc; - - int nb_planes; - int width[4], height[4]; - - void (*threshold)(const uint8_t *in, const uint8_t *threshold, - const uint8_t *min, const uint8_t *max, - uint8_t *out, - ptrdiff_t ilinesize, ptrdiff_t tlinesize, - ptrdiff_t flinesize, ptrdiff_t slinesize, - ptrdiff_t olinesize, - int w, int h); - - AVFrame *frames[4]; - FFFrameSync fs; -} ThresholdContext; +#include "threshold.h" #define OFFSET(x) offsetof(ThresholdContext, x) #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM @@ -203,8 +183,9 @@ static int config_input(AVFilterLink *inlink) s->height[0] = s->height[3] = inlink->h; s->width[1] = s->width[2] = AV_CEIL_RSHIFT(inlink->w, hsub); s->width[0] = s->width[3] = inlink->w; + s->depth = desc->comp[0].depth; - if (desc->comp[0].depth == 8) { + if (s->depth == 8) { s->threshold = threshold8; s->bpc = 1; } else { @@ -212,6 +193,9 @@ static int config_input(AVFilterLink *inlink) s->bpc = 2; } + if (ARCH_X86) + ff_threshold_init_x86(s); + return 0; } diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile index 3431625883..c10f4d5538 100644 --- a/libavfilter/x86/Makefile +++ b/libavfilter/x86/Makefile @@ -20,6 +20,7 @@ OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o OBJS-$(CONFIG_SSIM_FILTER) += x86/vf_ssim_init.o OBJS-$(CONFIG_STEREO3D_FILTER) += x86/vf_stereo3d_init.o OBJS-$(CONFIG_TBLEND_FILTER) += x86/vf_blend_init.o +OBJS-$(CONFIG_THRESHOLD_FILTER) += x86/vf_threshold_init.o OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_tinterlace_init.o OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o OBJS-$(CONFIG_W3FDIF_FILTER) += x86/vf_w3fdif_init.o @@ -46,6 +47,7 @@ X86ASM-OBJS-$(CONFIG_SHOWCQT_FILTER) += x86/avf_showcqt.o X86ASM-OBJS-$(CONFIG_SSIM_FILTER) += x86/vf_ssim.o X86ASM-OBJS-$(CONFIG_STEREO3D_FILTER) += x86/vf_stereo3d.o X86ASM-OBJS-$(CONFIG_TBLEND_FILTER) += x86/vf_blend.o +X86ASM-OBJS-$(CONFIG_THRESHOLD_FILTER) += x86/vf_threshold.o X86ASM-OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_interlace.o X86ASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o X86ASM-OBJS-$(CONFIG_W3FDIF_FILTER) += x86/vf_w3fdif.o diff --git a/libavfilter/x86/vf_threshold.asm b/libavfilter/x86/vf_threshold.asm new file mode 100644 index 0000000000..9b0bfd8161 --- /dev/null +++ b/libavfilter/x86/vf_threshold.asm @@ -0,0 +1,69 @@ +;***************************************************************************** +;* x86-optimized functions for threshold filter +;* +;* Copyright (C) 2017 Paul B Mahol +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;***************************************************************************** + +%include "libavutil/x86/x86util.asm" + +%if ARCH_X86_64 + +SECTION_RODATA + +pb_128: times 16 db 128 + +SECTION .text + +INIT_XMM sse4 +cglobal threshold8, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x + mov wd, dword wm + mov hd, dword hm + mova m4, [pb_128] + add inq, wq + add thresholdq, wq + add minq, wq + add maxq, wq + add outq, wq + neg wq +.nextrow: + mov xq, wq + + .loop: + movu m1, [inq + xq] + movu m0, [thresholdq + xq] + movu m2, [minq + xq] + movu m3, [maxq + xq] + pxor m0, m4 + pxor m1, m4 + pcmpgtb m0, m1 + pblendvb m3, m2, m0 + movu [outq + xq], m3 + add xq, mmsize + jl .loop + + add inq, ilinesizeq + add thresholdq, tlinesizeq + add minq, flinesizeq + add maxq, slinesizeq + add outq, olinesizeq + sub hd, 1 + jg .nextrow +RET + +%endif diff --git a/libavfilter/x86/vf_threshold_init.c b/libavfilter/x86/vf_threshold_init.c new file mode 100644 index 0000000000..e2bbae11d5 --- /dev/null +++ b/libavfilter/x86/vf_threshold_init.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2015 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/cpu.h" +#include "libavfilter/threshold.h" + +void ff_threshold8_sse4(const uint8_t *in, const uint8_t *threshold, + const uint8_t *min, const uint8_t *max, + uint8_t *out, + ptrdiff_t ilinesize, ptrdiff_t tlinesize, + ptrdiff_t flinesize, ptrdiff_t slinesize, + ptrdiff_t olinesize, + int w, int h); + +av_cold void ff_threshold_init_x86(ThresholdContext *s) +{ + int cpu_flags = av_get_cpu_flags(); + + if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags) && s->depth == 8) { + s->threshold = ff_threshold8_sse4; + } +}