mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2024-11-24 03:59:43 +00:00
yadif: restore speed of the C filtering code
Always use the special filter for the first and last 3 columns (only).
Changes made in 64ed397
slowed the filter to just under 3/4 of what it
was. This commit restores the speed while maintaining identical output.
For reference, on my Athlon64:
1733222 decicycles in old
2358563 decicycles in new
1727558 decicycles in this
Signed-off-by: Anton Khirnov <anton@khirnov.net>
This commit is contained in:
parent
252c0bfdc0
commit
b0ef0ae776
@ -33,14 +33,17 @@
|
|||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
#define CHECK(j)\
|
#define CHECK(j)\
|
||||||
{ int score = FFABS(cur[mrefs + off_left + (j)] - cur[prefs + off_left - (j)])\
|
{ int score = FFABS(cur[mrefs - 1 + (j)] - cur[prefs - 1 - (j)])\
|
||||||
+ FFABS(cur[mrefs +(j)] - cur[prefs -(j)])\
|
+ FFABS(cur[mrefs +(j)] - cur[prefs -(j)])\
|
||||||
+ FFABS(cur[mrefs + off_right + (j)] - cur[prefs + off_right - (j)]);\
|
+ FFABS(cur[mrefs + 1 + (j)] - cur[prefs + 1 - (j)]);\
|
||||||
if (score < spatial_score) {\
|
if (score < spatial_score) {\
|
||||||
spatial_score= score;\
|
spatial_score= score;\
|
||||||
spatial_pred= (cur[mrefs +(j)] + cur[prefs -(j)])>>1;\
|
spatial_pred= (cur[mrefs +(j)] + cur[prefs -(j)])>>1;\
|
||||||
|
|
||||||
#define FILTER(start, end) \
|
/* The is_not_edge argument here controls when the code will enter a branch
|
||||||
|
* which reads up to and including x-3 and x+3. */
|
||||||
|
|
||||||
|
#define FILTER(start, end, is_not_edge) \
|
||||||
for (x = start; x < end; x++) { \
|
for (x = start; x < end; x++) { \
|
||||||
int c = cur[mrefs]; \
|
int c = cur[mrefs]; \
|
||||||
int d = (prev2[0] + next2[0])>>1; \
|
int d = (prev2[0] + next2[0])>>1; \
|
||||||
@ -50,12 +53,10 @@
|
|||||||
int temporal_diff2 =(FFABS(next[mrefs] - c) + FFABS(next[prefs] - e) )>>1; \
|
int temporal_diff2 =(FFABS(next[mrefs] - c) + FFABS(next[prefs] - e) )>>1; \
|
||||||
int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); \
|
int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); \
|
||||||
int spatial_pred = (c+e) >> 1; \
|
int spatial_pred = (c+e) >> 1; \
|
||||||
int off_right = (x < w - 1) ? 1 : -1;\
|
|
||||||
int off_left = x ? -1 : 1;\
|
|
||||||
int spatial_score = FFABS(cur[mrefs + off_left] - cur[prefs + off_left]) + FFABS(c-e) \
|
|
||||||
+ FFABS(cur[mrefs + off_right] - cur[prefs + off_right]) - 1; \
|
|
||||||
\
|
\
|
||||||
if (x > 2 && x < w - 3) {\
|
if (is_not_edge) {\
|
||||||
|
int spatial_score = FFABS(cur[mrefs - 1] - cur[prefs - 1]) + FFABS(c-e) \
|
||||||
|
+ FFABS(cur[mrefs + 1] - cur[prefs + 1]) - 1; \
|
||||||
CHECK(-1) CHECK(-2) }} }} \
|
CHECK(-1) CHECK(-2) }} }} \
|
||||||
CHECK( 1) CHECK( 2) }} }} \
|
CHECK( 1) CHECK( 2) }} }} \
|
||||||
}\
|
}\
|
||||||
@ -96,12 +97,15 @@ static void filter_line_c(void *dst1,
|
|||||||
uint8_t *prev2 = parity ? prev : cur ;
|
uint8_t *prev2 = parity ? prev : cur ;
|
||||||
uint8_t *next2 = parity ? cur : next;
|
uint8_t *next2 = parity ? cur : next;
|
||||||
|
|
||||||
FILTER(0, w)
|
/* The function is called with the pointers already pointing to data[3] and
|
||||||
|
* with 6 subtracted from the width. This allows the FILTER macro to be
|
||||||
|
* called so that it processes all the pixels normally. A constant value of
|
||||||
|
* true for is_not_edge lets the compiler ignore the if statement. */
|
||||||
|
FILTER(0, w, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void filter_edges(void *dst1, void *prev1, void *cur1, void *next1,
|
static void filter_edges(void *dst1, void *prev1, void *cur1, void *next1,
|
||||||
int w, int prefs, int mrefs, int parity, int mode,
|
int w, int prefs, int mrefs, int parity, int mode)
|
||||||
int l_edge)
|
|
||||||
{
|
{
|
||||||
uint8_t *dst = dst1;
|
uint8_t *dst = dst1;
|
||||||
uint8_t *prev = prev1;
|
uint8_t *prev = prev1;
|
||||||
@ -111,7 +115,9 @@ static void filter_edges(void *dst1, void *prev1, void *cur1, void *next1,
|
|||||||
uint8_t *prev2 = parity ? prev : cur ;
|
uint8_t *prev2 = parity ? prev : cur ;
|
||||||
uint8_t *next2 = parity ? cur : next;
|
uint8_t *next2 = parity ? cur : next;
|
||||||
|
|
||||||
FILTER(0, l_edge)
|
/* Only edge pixels need to be processed here. A constant value of false
|
||||||
|
* for is_not_edge should let the compiler ignore the whole branch. */
|
||||||
|
FILTER(0, 3, 0)
|
||||||
|
|
||||||
dst = (uint8_t*)dst1 + w - 3;
|
dst = (uint8_t*)dst1 + w - 3;
|
||||||
prev = (uint8_t*)prev1 + w - 3;
|
prev = (uint8_t*)prev1 + w - 3;
|
||||||
@ -120,7 +126,7 @@ static void filter_edges(void *dst1, void *prev1, void *cur1, void *next1,
|
|||||||
prev2 = (uint8_t*)(parity ? prev : cur);
|
prev2 = (uint8_t*)(parity ? prev : cur);
|
||||||
next2 = (uint8_t*)(parity ? cur : next);
|
next2 = (uint8_t*)(parity ? cur : next);
|
||||||
|
|
||||||
FILTER(w - 3, w)
|
FILTER(w - 3, w, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -139,12 +145,11 @@ static void filter_line_c_16bit(void *dst1,
|
|||||||
mrefs /= 2;
|
mrefs /= 2;
|
||||||
prefs /= 2;
|
prefs /= 2;
|
||||||
|
|
||||||
FILTER(0, w)
|
FILTER(0, w, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void *next1,
|
static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void *next1,
|
||||||
int w, int prefs, int mrefs, int parity, int mode,
|
int w, int prefs, int mrefs, int parity, int mode)
|
||||||
int l_edge)
|
|
||||||
{
|
{
|
||||||
uint16_t *dst = dst1;
|
uint16_t *dst = dst1;
|
||||||
uint16_t *prev = prev1;
|
uint16_t *prev = prev1;
|
||||||
@ -154,7 +159,7 @@ static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void *next1,
|
|||||||
uint16_t *prev2 = parity ? prev : cur ;
|
uint16_t *prev2 = parity ? prev : cur ;
|
||||||
uint16_t *next2 = parity ? cur : next;
|
uint16_t *next2 = parity ? cur : next;
|
||||||
|
|
||||||
FILTER(0, l_edge)
|
FILTER(0, 3, 0)
|
||||||
|
|
||||||
dst = (uint16_t*)dst1 + w - 3;
|
dst = (uint16_t*)dst1 + w - 3;
|
||||||
prev = (uint16_t*)prev1 + w - 3;
|
prev = (uint16_t*)prev1 + w - 3;
|
||||||
@ -163,7 +168,7 @@ static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void *next1,
|
|||||||
prev2 = (uint16_t*)(parity ? prev : cur);
|
prev2 = (uint16_t*)(parity ? prev : cur);
|
||||||
next2 = (uint16_t*)(parity ? cur : next);
|
next2 = (uint16_t*)(parity ? cur : next);
|
||||||
|
|
||||||
FILTER(w - 3, w)
|
FILTER(w - 3, w, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void filter(AVFilterContext *ctx, AVFrame *dstpic,
|
static void filter(AVFilterContext *ctx, AVFrame *dstpic,
|
||||||
@ -177,7 +182,7 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic,
|
|||||||
int h = dstpic->height;
|
int h = dstpic->height;
|
||||||
int refs = yadif->cur->linesize[i];
|
int refs = yadif->cur->linesize[i];
|
||||||
int df = (yadif->csp->comp[i].depth_minus1 + 8) / 8;
|
int df = (yadif->csp->comp[i].depth_minus1 + 8) / 8;
|
||||||
int l_edge, l_edge_pix;
|
int pix_3 = 3 * df;
|
||||||
|
|
||||||
if (i == 1 || i == 2) {
|
if (i == 1 || i == 2) {
|
||||||
/* Why is this not part of the per-plane description thing? */
|
/* Why is this not part of the per-plane description thing? */
|
||||||
@ -188,8 +193,6 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic,
|
|||||||
/* filtering reads 3 pixels to the left/right; to avoid invalid reads,
|
/* filtering reads 3 pixels to the left/right; to avoid invalid reads,
|
||||||
* we need to call the c variant which avoids this for border pixels
|
* we need to call the c variant which avoids this for border pixels
|
||||||
*/
|
*/
|
||||||
l_edge = yadif->req_align;
|
|
||||||
l_edge_pix = l_edge / df;
|
|
||||||
|
|
||||||
for (y = 0; y < h; y++) {
|
for (y = 0; y < h; y++) {
|
||||||
if ((y ^ parity) & 1) {
|
if ((y ^ parity) & 1) {
|
||||||
@ -198,22 +201,15 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic,
|
|||||||
uint8_t *next = &yadif->next->data[i][y * refs];
|
uint8_t *next = &yadif->next->data[i][y * refs];
|
||||||
uint8_t *dst = &dstpic->data[i][y * dstpic->linesize[i]];
|
uint8_t *dst = &dstpic->data[i][y * dstpic->linesize[i]];
|
||||||
int mode = y == 1 || y + 2 == h ? 2 : yadif->mode;
|
int mode = y == 1 || y + 2 == h ? 2 : yadif->mode;
|
||||||
if (yadif->req_align) {
|
yadif->filter_line(dst + pix_3, prev + pix_3, cur + pix_3,
|
||||||
yadif->filter_line(dst + l_edge, prev + l_edge, cur + l_edge,
|
next + pix_3, w - 6,
|
||||||
next + l_edge, w - l_edge_pix - 3,
|
y + 1 < h ? refs : -refs,
|
||||||
y + 1 < h ? refs : -refs,
|
y ? -refs : refs,
|
||||||
y ? -refs : refs,
|
parity ^ tff, mode);
|
||||||
parity ^ tff, mode);
|
yadif->filter_edges(dst, prev, cur, next, w,
|
||||||
yadif->filter_edges(dst, prev, cur, next, w,
|
y + 1 < h ? refs : -refs,
|
||||||
y + 1 < h ? refs : -refs,
|
y ? -refs : refs,
|
||||||
y ? -refs : refs,
|
parity ^ tff, mode);
|
||||||
parity ^ tff, mode, l_edge_pix);
|
|
||||||
} else {
|
|
||||||
yadif->filter_line(dst, prev, cur, next + l_edge, w,
|
|
||||||
y + 1 < h ? refs : -refs,
|
|
||||||
y ? -refs : refs,
|
|
||||||
parity ^ tff, mode);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
memcpy(&dstpic->data[i][y * dstpic->linesize[i]],
|
memcpy(&dstpic->data[i][y * dstpic->linesize[i]],
|
||||||
&yadif->cur->data[i][y * refs], w * df);
|
&yadif->cur->data[i][y * refs], w * df);
|
||||||
|
@ -42,18 +42,12 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif)
|
|||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
if (EXTERNAL_MMXEXT(cpu_flags))
|
||||||
yadif->filter_line = ff_yadif_filter_line_mmxext;
|
yadif->filter_line = ff_yadif_filter_line_mmxext;
|
||||||
yadif->req_align = 8;
|
|
||||||
}
|
|
||||||
#endif /* ARCH_X86_32 */
|
#endif /* ARCH_X86_32 */
|
||||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags))
|
||||||
yadif->filter_line = ff_yadif_filter_line_sse2;
|
yadif->filter_line = ff_yadif_filter_line_sse2;
|
||||||
yadif->req_align = 16;
|
if (EXTERNAL_SSSE3(cpu_flags))
|
||||||
}
|
|
||||||
if (EXTERNAL_SSSE3(cpu_flags)) {
|
|
||||||
yadif->filter_line = ff_yadif_filter_line_ssse3;
|
yadif->filter_line = ff_yadif_filter_line_ssse3;
|
||||||
yadif->req_align = 16;
|
|
||||||
}
|
|
||||||
#endif /* HAVE_YASM */
|
#endif /* HAVE_YASM */
|
||||||
}
|
}
|
||||||
|
@ -55,13 +55,11 @@ typedef struct YADIFContext {
|
|||||||
/**
|
/**
|
||||||
* Required alignment for filter_line
|
* Required alignment for filter_line
|
||||||
*/
|
*/
|
||||||
int req_align;
|
|
||||||
void (*filter_line)(void *dst,
|
void (*filter_line)(void *dst,
|
||||||
void *prev, void *cur, void *next,
|
void *prev, void *cur, void *next,
|
||||||
int w, int prefs, int mrefs, int parity, int mode);
|
int w, int prefs, int mrefs, int parity, int mode);
|
||||||
void (*filter_edges)(void *dst, void *prev, void *cur, void *next,
|
void (*filter_edges)(void *dst, void *prev, void *cur, void *next,
|
||||||
int w, int prefs, int mrefs, int parity, int mode,
|
int w, int prefs, int mrefs, int parity, int mode);
|
||||||
int l_edge);
|
|
||||||
|
|
||||||
const AVPixFmtDescriptor *csp;
|
const AVPixFmtDescriptor *csp;
|
||||||
int eof;
|
int eof;
|
||||||
|
Loading…
Reference in New Issue
Block a user