Merge remote-tracking branch 'qatar/master'

* qatar/master: png: add missing #if HAVE_SSSE3 around function pointer assignment. imdct36: mark SSE functions as using all 16 XMM registers. png: move DSP functions to their own DSP context. sunrast: Add a sample request for TIFF, IFF, and Experimental Rastfile formats. sunrast: Cosmetics sunrast: Remove if (unsigned int < 0) check. sunrast: Replace magic number by a macro. Conflicts: libavcodec/dsputil.c libavcodec/dsputil.h libavcodec/pngdec.c libavcodec/sunrast.c libavcodec/x86/Makefile libavcodec/x86/dsputil_mmx.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
2025-04-01 23:52:07 +00:00 · 2012-01-30 05:20:58 +01:00 · 2012-01-30 05:20:58 +01:00 · e1492151fb
commit e1492151fb
parent 90bf7c7b41 20a7d3178f
10 changed files with 192 additions and 136 deletions
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@ -322,7 +322,7 @@ OBJS-$(CONFIG_PGMYUV_DECODER)          += pnmdec.o pnm.o
 OBJS-$(CONFIG_PGMYUV_ENCODER)          += pnmenc.o pnm.o
 OBJS-$(CONFIG_PGSSUB_DECODER)          += pgssubdec.o
 OBJS-$(CONFIG_PICTOR_DECODER)          += pictordec.o cga_data.o
-OBJS-$(CONFIG_PNG_DECODER)             += png.o pngdec.o
+OBJS-$(CONFIG_PNG_DECODER)             += png.o pngdec.o pngdsp.o
 OBJS-$(CONFIG_PNG_ENCODER)             += png.o pngenc.o
 OBJS-$(CONFIG_PPM_DECODER)             += pnmdec.o pnm.o
 OBJS-$(CONFIG_PPM_ENCODER)             += pnmenc.o pnm.o
--- a/libavcodec/png.h
+++ b/libavcodec/png.h
@ -23,9 +23,6 @@
 #define AVCODEC_PNG_H

 #include <stdint.h>
-#include <zlib.h>
-
-#include "avcodec.h"

 #define PNG_COLOR_MASK_PALETTE    1
 #define PNG_COLOR_MASK_COLOR      2
@ -72,41 +69,4 @@ int ff_png_pass_row_size(int pass, int bits_per_pixel, int width);

 void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);

-typedef struct PNGDecContext {
-    const uint8_t *bytestream;
-    const uint8_t *bytestream_start;
-    const uint8_t *bytestream_end;
-    AVFrame picture1, picture2;
-    AVFrame *current_picture, *last_picture;
-
-    int state;
-    int width, height;
-    int bit_depth;
-    int color_type;
-    int compression_type;
-    int interlace_type;
-    int filter_type;
-    int channels;
-    int bits_per_pixel;
-    int bpp;
-
-    uint8_t *image_buf;
-    int image_linesize;
-    uint32_t palette[256];
-    uint8_t *crow_buf;
-    uint8_t *last_row;
-    uint8_t *tmp_row;
-    int pass;
-    int crow_size; /* compressed row size (include filter type) */
-    int row_size; /* decompressed row size */
-    int pass_row_size; /* decompress row size of the current pass */
-    int y;
-    z_stream zstream;
-
-    void (*add_bytes_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w);
-    void (*add_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
-} PNGDecContext;
-
-void ff_png_init_mmx(PNGDecContext *s);
-
 #endif /* AVCODEC_PNG_H */
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@ -25,6 +25,7 @@
 #include "avcodec.h"
 #include "bytestream.h"
 #include "png.h"
+#include "pngdsp.h"

 /* TODO:
 * - add 16 bit depth support
@ -32,6 +33,42 @@

 #include <zlib.h>

+//#define DEBUG
+
+typedef struct PNGDecContext {
+    PNGDSPContext dsp;
+
+    const uint8_t *bytestream;
+    const uint8_t *bytestream_start;
+    const uint8_t *bytestream_end;
+    AVFrame picture1, picture2;
+    AVFrame *current_picture, *last_picture;
+
+    int state;
+    int width, height;
+    int bit_depth;
+    int color_type;
+    int compression_type;
+    int interlace_type;
+    int filter_type;
+    int channels;
+    int bits_per_pixel;
+    int bpp;
+
+    uint8_t *image_buf;
+    int image_linesize;
+    uint32_t palette[256];
+    uint8_t *crow_buf;
+    uint8_t *last_row;
+    uint8_t *tmp_row;
+    int pass;
+    int crow_size; /* compressed row size (include filter type) */
+    int row_size; /* decompressed row size */
+    int pass_row_size; /* decompress row size of the current pass */
+    int y;
+    z_stream zstream;
+} PNGDecContext;
+
 /* Mask to determine which y pixels can be written in a pass */
 static const uint8_t png_pass_dsp_ymask[NB_PASSES] = {
    0xff, 0xff, 0x0f, 0xff, 0x33, 0xff, 0x55,
@ -114,23 +151,7 @@ static void png_put_interlaced_row(uint8_t *dst, int width,
    }
 }

-// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
-#define pb_7f (~0UL/255 * 0x7f)
-#define pb_80 (~0UL/255 * 0x80)
-
-static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)
-{
-    long i;
-    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
-        long a = *(long*)(src1+i);
-        long b = *(long*)(src2+i);
-        *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
-    }
-    for(; i<w; i++)
-        dst[i] = src1[i]+src2[i];
-}
-
-static void add_paeth_prediction_c(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)
+void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)
 {
    int i;
    for(i = 0; i < w; i++) {
@ -187,7 +208,7 @@ static void add_paeth_prediction_c(uint8_t *dst, uint8_t *src, uint8_t *top, int
    }

 /* NOTE: 'dst' can be equal to 'last' */
-static void png_filter_row(PNGDecContext *s, uint8_t *dst, int filter_type,
+static void png_filter_row(PNGDSPContext *dsp, uint8_t *dst, int filter_type,
                           uint8_t *src, uint8_t *last, int size, int bpp)
 {
    int i, p, r, g, b, a;
@ -213,7 +234,7 @@ static void png_filter_row(PNGDecContext *s, uint8_t *dst, int filter_type,
        }
        break;
    case PNG_FILTER_VALUE_UP:
-        s->add_bytes_l2(dst, src, last, size);
+        dsp->add_bytes_l2(dst, src, last, size);
        break;
    case PNG_FILTER_VALUE_AVG:
        for(i = 0; i < bpp; i++) {
@ -231,10 +252,10 @@ static void png_filter_row(PNGDecContext *s, uint8_t *dst, int filter_type,
        if(bpp > 2 && size > 4) {
            // would write off the end of the array if we let it process the last pixel with bpp=3
            int w = bpp==4 ? size : size-3;
-            s->add_paeth_prediction(dst+i, src+i, last+i, w-i, bpp);
+            dsp->add_paeth_prediction(dst+i, src+i, last+i, w-i, bpp);
            i = w;
        }
-        add_paeth_prediction_c(dst+i, src+i, last+i, size-i, bpp);
+        ff_add_png_paeth_prediction(dst+i, src+i, last+i, size-i, bpp);
        break;
    }
 }
@ -704,14 +725,7 @@ static av_cold int png_dec_init(AVCodecContext *avctx)
    avcodec_get_frame_defaults(&s->picture1);
    avcodec_get_frame_defaults(&s->picture2);

-#if HAVE_MMX
-    ff_png_init_mmx(s);
-#endif
-
-    if (!s->add_paeth_prediction)
-        s->add_paeth_prediction = add_paeth_prediction_c;
-    if (!s->add_bytes_l2)
-        s->add_bytes_l2 = add_bytes_l2_c;
+    ff_pngdsp_init(&s->dsp);

    return 0;
 }
--- a/libavcodec/pngdsp.c
+++ b/libavcodec/pngdsp.c
@ -0,0 +1,48 @@
+/*
+ * PNG image format
+ * Copyright (c) 2008 Loren Merrit <lorenm@u.washington.edu>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "png.h"
+#include "pngdsp.h"
+
+// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
+#define pb_7f (~0UL/255 * 0x7f)
+#define pb_80 (~0UL/255 * 0x80)
+
+static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)
+{
+    long i;
+    for (i = 0; i <= w - sizeof(long); i += sizeof(long)) {
+        long a = *(long *)(src1 + i);
+        long b = *(long *)(src2 + i);
+        *(long *)(dst + i) = ((a & pb_7f) + (b & pb_7f)) ^ ((a ^ b) & pb_80);
+    }
+    for (; i < w; i++)
+        dst[i] = src1[i] + src2[i];
+}
+
+void ff_pngdsp_init(PNGDSPContext *dsp)
+{
+    dsp->add_bytes_l2         = add_bytes_l2_c;
+    dsp->add_paeth_prediction = ff_add_png_paeth_prediction;
+
+    if (HAVE_MMX) ff_pngdsp_init_x86(dsp);
+}
--- a/libavcodec/pngdsp.h
+++ b/libavcodec/pngdsp.h
@ -0,0 +1,40 @@
+/*
+ * PNG image format
+ * Copyright (c) 2008 Loren Merrit <lorenm@u.washington.edu>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PNGDSP_H
+#define AVCODEC_PNGDSP_H
+
+#include <stdint.h>
+
+typedef struct PNGDSPContext {
+    void (*add_bytes_l2)(uint8_t *dst  /* align 16 */,
+                         uint8_t *src1 /* align 16 */,
+                         uint8_t *src2 /* align 16 */, int w);
+
+    /* this might write to dst[w] */
+    void (*add_paeth_prediction)(uint8_t *dst, uint8_t *src,
+                                 uint8_t *top, int w, int bpp);
+} PNGDSPContext;
+
+void ff_pngdsp_init(PNGDSPContext *dsp);
+void ff_pngdsp_init_x86(PNGDSPContext *dsp);
+
+#endif /* AVCDODEC_PNGDSP_H */
--- a/libavcodec/sunrast.c
+++ b/libavcodec/sunrast.c
@ -23,6 +23,8 @@
 #include "libavutil/imgutils.h"
 #include "avcodec.h"

+#define RAS_MAGIC 0x59a66a95
+
 /* The Old and Standard format types indicate that the image data is
 * uncompressed. There is no difference between the two formats. */
 #define RT_OLD          0
@ -55,18 +57,18 @@ static av_cold int sunrast_init(AVCodecContext *avctx) {
    SUNRASTContext *s = avctx->priv_data;

    avcodec_get_frame_defaults(&s->picture);
-    avctx->coded_frame= &s->picture;
+    avctx->coded_frame = &s->picture;

    return 0;
 }

 static int sunrast_decode_frame(AVCodecContext *avctx, void *data,
                                int *data_size, AVPacket *avpkt) {
-    const uint8_t *buf = avpkt->data;
-    const uint8_t *buf_end = avpkt->data + avpkt->size;
+    const uint8_t *buf       = avpkt->data;
+    const uint8_t *buf_end   = avpkt->data + avpkt->size;
    SUNRASTContext * const s = avctx->priv_data;
-    AVFrame *picture = data;
-    AVFrame * const p = &s->picture;
+    AVFrame *picture         = data;
+    AVFrame * const p        = &s->picture;
    unsigned int w, h, depth, type, maptype, maplength, stride, x, y, len, alen;
    uint8_t *ptr, *ptr2 = NULL;
    const uint8_t *bufstart = buf;
@ -74,22 +76,22 @@ static int sunrast_decode_frame(AVCodecContext *avctx, void *data,
    if (avpkt->size < 32)
        return AVERROR_INVALIDDATA;

-    if (AV_RB32(buf) != 0x59a66a95) {
+    if (AV_RB32(buf) != RAS_MAGIC) {
        av_log(avctx, AV_LOG_ERROR, "this is not sunras encoded data\n");
        return -1;
    }

-    w         = AV_RB32(buf+4);
-    h         = AV_RB32(buf+8);
-    depth     = AV_RB32(buf+12);
-    type      = AV_RB32(buf+20);
-    maptype   = AV_RB32(buf+24);
-    maplength = AV_RB32(buf+28);
+    w         = AV_RB32(buf + 4);
+    h         = AV_RB32(buf + 8);
+    depth     = AV_RB32(buf + 12);
+    type      = AV_RB32(buf + 20);
+    maptype   = AV_RB32(buf + 24);
+    maplength = AV_RB32(buf + 28);
    buf      += 32;

    if (type == RT_EXPERIMENTAL) {
-        av_log(avctx, AV_LOG_ERROR, "unsupported (compression) type\n");
-        return -1;
+        av_log_ask_for_sample(avctx, "unsupported (compression) type\n");
+        return AVERROR_PATCHWELCOME;
    }
    if (type > RT_FORMAT_IFF) {
        av_log(avctx, AV_LOG_ERROR, "invalid (compression) type\n");
@ -161,7 +163,7 @@ static int sunrast_decode_frame(AVCodecContext *avctx, void *data,
        }

        ptr = p->data[1];
-        for (x=0; x<len; x++, ptr+=4)
+        for (x = 0; x < len; x++, ptr += 4)
            *(uint32_t *)ptr = (0xFF<<24) + (buf[x]<<16) + (buf[len+x]<<8) + buf[len+len+x];
    }

@ -179,11 +181,11 @@ static int sunrast_decode_frame(AVCodecContext *avctx, void *data,

    /* scanlines are aligned on 16 bit boundaries */
    len  = (depth * w + 7) >> 3;
-    alen = len + (len&1);
+    alen = len + (len & 1);

    if (type == RT_BYTE_ENCODED) {
        int value, run;
-        uint8_t *end = ptr + h*stride;
+        uint8_t *end = ptr + h * stride;

        x = 0;
        while (ptr != end && buf < buf_end) {
@ -208,7 +210,7 @@ static int sunrast_decode_frame(AVCodecContext *avctx, void *data,
            }
        }
    } else {
-        for (y=0; y<h; y++) {
+        for (y = 0; y < h; y++) {
            if (buf_end - buf < len)
                break;
            memcpy(ptr, buf, len);
@ -241,7 +243,7 @@ static int sunrast_decode_frame(AVCodecContext *avctx, void *data,
        av_freep(&ptr_free);
    }

-    *picture = s->picture;
+    *picture   = s->picture;
    *data_size = sizeof(AVFrame);

    return buf - bufstart;
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@ -42,13 +42,13 @@ MMX-OBJS-$(CONFIG_CAVS_DECODER)        += x86/cavsdsp_mmx.o
 MMX-OBJS-$(CONFIG_DNXHD_ENCODER)       += x86/dnxhd_mmx.o
 MMX-OBJS-$(CONFIG_MPEGAUDIODSP)        += x86/mpegaudiodec_mmx.o
 YASM-OBJS-$(CONFIG_MPEGAUDIODSP)       += x86/imdct36_sse.o
-MMX-OBJS-$(CONFIG_PNG_DECODER)         += x86/png_mmx.o
 MMX-OBJS-$(CONFIG_ENCODERS)            += x86/dsputilenc_mmx.o
 YASM-OBJS-$(CONFIG_ENCODERS)           += x86/dsputilenc_yasm.o
 MMX-OBJS-$(CONFIG_GPL)                 += x86/idct_mmx.o
 MMX-OBJS-$(CONFIG_LPC)                 += x86/lpc_mmx.o
 YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER)     += x86/proresdsp.o
 MMX-OBJS-$(CONFIG_PRORES_LGPL_DECODER)      += x86/proresdsp-init.o
+MMX-OBJS-$(CONFIG_PNG_DECODER)         += x86/pngdsp-init.o
 YASM-OBJS-$(CONFIG_PRORES_DECODER)     += x86/proresdsp.o
 MMX-OBJS-$(CONFIG_PRORES_DECODER)      += x86/proresdsp-init.o
 MMX-OBJS-$(CONFIG_DWT)                 += x86/snowdsp_mmx.o \
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@ -2671,7 +2671,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
            if (HAVE_AMD3DNOW && (mm_flags & AV_CPU_FLAG_3DNOW))
                c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
 #endif
-
        } else if (HAVE_AMD3DNOW && (mm_flags & AV_CPU_FLAG_3DNOW)) {
            c->prefetch = prefetch_3dnow;

--- a/libavcodec/x86/imdct36_sse.asm
+++ b/libavcodec/x86/imdct36_sse.asm
@ -393,7 +393,7 @@ INIT_XMM sse
 %endif

 %macro DEFINE_FOUR_IMDCT 0
-cglobal four_imdct36_float, 5,5,8, out, buf, in, win, tmp
+cglobal four_imdct36_float, 5,5,16, out, buf, in, win, tmp
    movlps  m0, [inq+64]
    movhps  m0, [inq+64 +   72]
    movlps  m3, [inq+64 + 2*72]
--- a/libavcodec/x86/pngdsp-init.c
+++ b/libavcodec/x86/pngdsp-init.c
@ -1,6 +1,6 @@
 /*
- * MMX optimized PNG utils
- * Copyright (c) 2008 Loren Merritt
+ * x86 PNG optimizations.
+ * Copyright (c) 2008 Loren Merrit <lorenm@u.washington.edu>
 *
 * This file is part of FFmpeg.
 *
@ -17,46 +17,18 @@
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
 */

 #include "libavutil/cpu.h"
 #include "libavutil/x86_cpu.h"
-#include "libavcodec/dsputil.h"
-#include "libavcodec/png.h"
+#include "libavcodec/pngdsp.h"
 #include "dsputil_mmx.h"

-//#undef NDEBUG
-//#include <assert.h>
-
-static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)
-{
-    x86_reg i=0;
-    __asm__ volatile(
-        "jmp 2f                         \n\t"
-        "1:                             \n\t"
-        "movq   (%2, %0), %%mm0         \n\t"
-        "movq  8(%2, %0), %%mm1         \n\t"
-        "paddb  (%3, %0), %%mm0         \n\t"
-        "paddb 8(%3, %0), %%mm1         \n\t"
-        "movq %%mm0,  (%1, %0)          \n\t"
-        "movq %%mm1, 8(%1, %0)          \n\t"
-        "add $16, %0                    \n\t"
-        "2:                             \n\t"
-        "cmp %4, %0                     \n\t"
-        " js 1b                         \n\t"
-        : "+r" (i)
-        : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15)
-    );
-    for(; i<w; i++)
-        dst[i] = src1[i] + src2[i];
-}
-
 #define PAETH(cpu, abs3)\
-static void add_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
+static void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
 {\
    x86_reg i, end;\
-    if(bpp>4) add_paeth_prediction_##cpu(dst+bpp/2, src+bpp/2, top+bpp/2, w-bpp/2, -bpp);\
+    if(bpp>4) add_png_paeth_prediction_##cpu(dst+bpp/2, src+bpp/2, top+bpp/2, w-bpp/2, -bpp);\
    if(bpp<0) bpp=-bpp;\
    i= -bpp;\
    end = w-3;\
@ -128,16 +100,37 @@ PAETH(mmx2, ABS3_MMX2)
 PAETH(ssse3, ABS3_SSSE3)
 #endif

-void ff_png_init_mmx(PNGDecContext *s)
+static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)
 {
-    int mm_flags = av_get_cpu_flags();
-
-    if (mm_flags & AV_CPU_FLAG_MMX2) {
-        s->add_bytes_l2 = add_bytes_l2_mmx;
-        s->add_paeth_prediction = add_paeth_prediction_mmx2;
-#if HAVE_SSSE3
-        if (mm_flags & AV_CPU_FLAG_SSSE3)
-            s->add_paeth_prediction = add_paeth_prediction_ssse3;
-#endif
-    }
+    x86_reg i=0;
+    __asm__ volatile(
+        "jmp 2f                         \n\t"
+        "1:                             \n\t"
+        "movq   (%2, %0), %%mm0         \n\t"
+        "movq  8(%2, %0), %%mm1         \n\t"
+        "paddb  (%3, %0), %%mm0         \n\t"
+        "paddb 8(%3, %0), %%mm1         \n\t"
+        "movq %%mm0,  (%1, %0)          \n\t"
+        "movq %%mm1, 8(%1, %0)          \n\t"
+        "add $16, %0                    \n\t"
+        "2:                             \n\t"
+        "cmp %4, %0                     \n\t"
+        " js 1b                         \n\t"
+        : "+r" (i)
+        : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg) w - 15)
+    );
+    for (; i < w; i++)
+        dst[i] = src1[i] + src2[i];
+}
+
+void ff_pngdsp_init_x86(PNGDSPContext *dsp)
+{
+    int flags = av_get_cpu_flags();
+
+    if (flags & AV_CPU_FLAG_MMX)
+        dsp->add_bytes_l2         = add_bytes_l2_mmx;
+    if (flags & AV_CPU_FLAG_MMX2)
+        dsp->add_paeth_prediction = add_png_paeth_prediction_mmx2;
+    if (HAVE_SSSE3 && flags & AV_CPU_FLAG_SSSE3)
+        dsp->add_paeth_prediction = add_png_paeth_prediction_ssse3;
 }