From ed16c2dbf47cdd7c48825b4da6e7036698e5dde1 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Wed, 3 Apr 2013 20:09:58 +0200
Subject: [PATCH] h261: Remove H.261 loop filter from dsputil

There is no arch-optimized version of the H.261 loop filter and there
likely will never be, so the dsputil overhead does not give any benefit.
---
 libavcodec/dsputil.c | 29 -----------------------------
 libavcodec/dsputil.h |  2 --
 libavcodec/h261.c    | 40 ++++++++++++++++++++++++++++++++++------
 3 files changed, 34 insertions(+), 37 deletions(-)

diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 10d775ca5e..7b7cdc88d4 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -1479,33 +1479,6 @@ static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
     }
 }
 
-static void h261_loop_filter_c(uint8_t *src, int stride){
-    int x,y,xy,yz;
-    int temp[64];
-
-    for(x=0; x<8; x++){
-        temp[x      ] = 4*src[x           ];
-        temp[x + 7*8] = 4*src[x + 7*stride];
-    }
-    for(y=1; y<7; y++){
-        for(x=0; x<8; x++){
-            xy = y * stride + x;
-            yz = y * 8 + x;
-            temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
-        }
-    }
-
-    for(y=0; y<8; y++){
-        src[  y*stride] = (temp[  y*8] + 2)>>2;
-        src[7+y*stride] = (temp[7+y*8] + 2)>>2;
-        for(x=1; x<7; x++){
-            xy = y * stride + x;
-            yz = y * 8 + x;
-            src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
-        }
-    }
-}
-
 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
 {
     int s, i;
@@ -2735,8 +2708,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
         c->h263_v_loop_filter= h263_v_loop_filter_c;
     }
 
-    c->h261_loop_filter= h261_loop_filter_c;
-
     c->try_8x8basis= try_8x8basis_c;
     c->add_8x8basis= add_8x8basis_c;
 
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index ce4469dc5c..ca418fe0b0 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -277,8 +277,6 @@ typedef struct DSPContext {
     void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
     void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
 
-    void (*h261_loop_filter)(uint8_t *src, int stride);
-
     /* assume len is a multiple of 8, and arrays are 16-byte aligned */
     void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
 
diff --git a/libavcodec/h261.c b/libavcodec/h261.c
index c3abdb23a7..1ffd64a5ec 100644
--- a/libavcodec/h261.c
+++ b/libavcodec/h261.c
@@ -32,6 +32,34 @@
 
 uint8_t ff_h261_rl_table_store[2][2*MAX_RUN + MAX_LEVEL + 3];
 
+static void h261_loop_filter(uint8_t *src, int stride)
+{
+    int x,y,xy,yz;
+    int temp[64];
+
+    for(x=0; x<8; x++){
+        temp[x      ] = 4*src[x           ];
+        temp[x + 7*8] = 4*src[x + 7*stride];
+    }
+    for(y=1; y<7; y++){
+        for(x=0; x<8; x++){
+            xy = y * stride + x;
+            yz = y * 8 + x;
+            temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
+        }
+    }
+
+    for(y=0; y<8; y++){
+        src[  y*stride] = (temp[  y*8] + 2)>>2;
+        src[7+y*stride] = (temp[7+y*8] + 2)>>2;
+        for(x=1; x<7; x++){
+            xy = y * stride + x;
+            yz = y * 8 + x;
+            src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
+        }
+    }
+}
+
 void ff_h261_loop_filter(MpegEncContext *s){
     H261Context * h= (H261Context*)s;
     const int linesize  = s->linesize;
@@ -43,10 +71,10 @@ void ff_h261_loop_filter(MpegEncContext *s){
     if(!(IS_FIL (h->mtype)))
         return;
 
-    s->dsp.h261_loop_filter(dest_y                   , linesize);
-    s->dsp.h261_loop_filter(dest_y                + 8, linesize);
-    s->dsp.h261_loop_filter(dest_y + 8 * linesize    , linesize);
-    s->dsp.h261_loop_filter(dest_y + 8 * linesize + 8, linesize);
-    s->dsp.h261_loop_filter(dest_cb, uvlinesize);
-    s->dsp.h261_loop_filter(dest_cr, uvlinesize);
+    h261_loop_filter(dest_y,                    linesize);
+    h261_loop_filter(dest_y                + 8, linesize);
+    h261_loop_filter(dest_y + 8 * linesize,     linesize);
+    h261_loop_filter(dest_y + 8 * linesize + 8, linesize);
+    h261_loop_filter(dest_cb, uvlinesize);
+    h261_loop_filter(dest_cr, uvlinesize);
 }