From 064146480b22d541c1c2e0c3e64c95c4370b777e Mon Sep 17 00:00:00 2001
From: Loren Merritt <lorenm@u.washington.edu>
Date: Sun, 24 Mar 2013 16:04:48 +0100
Subject: [PATCH] tools: add tiny_ssim

Taken from:
http://lists.mplayerhq.hu/pipermail/mencoder-users/2006-August/003801.html
and
x264
See: [FFmpeg-devel] [PATCH] tools: add tiny_ssim

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 .gitignore        |   1 +
 Makefile          |   2 +-
 tests/Makefile    |   2 +-
 tests/tiny_ssim.c | 211 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 214 insertions(+), 2 deletions(-)
 create mode 100644 tests/tiny_ssim.c

diff --git a/.gitignore b/.gitignore
index 9b935c7c4d..fd4ce20d30 100644
--- a/.gitignore
+++ b/.gitignore
@@ -52,6 +52,7 @@
 /tests/data/
 /tests/rotozoom
 /tests/tiny_psnr
+/tests/tiny_ssim
 /tests/videogen
 /tests/vsynth1/
 /tools/aviocat
diff --git a/Makefile b/Makefile
index 07821d1146..d12a9a9d1b 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ INSTPROGS   = $(PROGS-yes:%=%$(PROGSSUF)$(EXESUF))
 
 OBJS        = cmdutils.o $(EXEOBJS)
 OBJS-ffmpeg = ffmpeg_opt.o ffmpeg_filter.o
-TESTTOOLS   = audiogen videogen rotozoom tiny_psnr base64
+TESTTOOLS   = audiogen videogen rotozoom tiny_psnr tiny_ssim base64
 HOSTPROGS  := $(TESTTOOLS:%=tests/%) doc/print_options
 TOOLS       = qt-faststart trasher
 TOOLS-$(CONFIG_ZLIB) += cws2fws
diff --git a/tests/Makefile b/tests/Makefile
index 73ee2d9c5d..0f911e1b66 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -147,7 +147,7 @@ $(FATE_EXTERN):
 	@echo "$@ requires external samples and SAMPLES not specified"; false
 endif
 
-FATE_UTILS = base64 tiny_psnr
+FATE_UTILS = base64 tiny_psnr tiny_ssim
 
 TOOL = ffmpeg
 
diff --git a/tests/tiny_ssim.c b/tests/tiny_ssim.c
new file mode 100644
index 0000000000..05eb98d54f
--- /dev/null
+++ b/tests/tiny_ssim.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2003-2013 Loren Merritt
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110 USA
+ */
+/*
+ * tiny_ssim.c
+ * Computes the Structural Similarity Metric between two rawYV12 video files.
+ * original algorithm:
+ * Z. Wang, A. C. Bovik, H. R. Sheikh and E. P. Simoncelli,
+ *   "Image quality assessment: From error visibility to structural similarity,"
+ *   IEEE Transactions on Image Processing, vol. 13, no. 4, pp. 600-612, Apr. 2004.
+ *
+ * To improve speed, this implementation uses the standard approximation of
+ * overlapped 8x8 block sums, rather than the original gaussian weights.
+ */
+
+#include <inttypes.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "libavutil/avutil.h"
+
+
+#define BIT_DEPTH 8
+#define PIXEL_MAX ((1 << BIT_DEPTH)-1)
+typedef uint8_t  pixel;
+
+/****************************************************************************
+ * structural similarity metric
+ ****************************************************************************/
+static void ssim_4x4x2_core( const pixel *pix1, intptr_t stride1,
+                             const pixel *pix2, intptr_t stride2,
+                             int sums[2][4] )
+{
+    for( int z = 0; z < 2; z++ )
+    {
+        uint32_t s1 = 0, s2 = 0, ss = 0, s12 = 0;
+        for( int y = 0; y < 4; y++ )
+            for( int x = 0; x < 4; x++ )
+            {
+                int a = pix1[x+y*stride1];
+                int b = pix2[x+y*stride2];
+                s1  += a;
+                s2  += b;
+                ss  += a*a;
+                ss  += b*b;
+                s12 += a*b;
+            }
+        sums[z][0] = s1;
+        sums[z][1] = s2;
+        sums[z][2] = ss;
+        sums[z][3] = s12;
+        pix1 += 4;
+        pix2 += 4;
+    }
+}
+
+static float ssim_end1( int s1, int s2, int ss, int s12 )
+{
+/* Maximum value for 10-bit is: ss*64 = (2^10-1)^2*16*4*64 = 4286582784, which will overflow in some cases.
+ * s1*s1, s2*s2, and s1*s2 also obtain this value for edge cases: ((2^10-1)*16*4)^2 = 4286582784.
+ * Maximum value for 9-bit is: ss*64 = (2^9-1)^2*16*4*64 = 1069551616, which will not overflow. */
+#if BIT_DEPTH > 9
+#define type float
+    static const float ssim_c1 = .01*.01*PIXEL_MAX*PIXEL_MAX*64;
+    static const float ssim_c2 = .03*.03*PIXEL_MAX*PIXEL_MAX*64*63;
+#else
+#define type int
+    static const int ssim_c1 = (int)(.01*.01*PIXEL_MAX*PIXEL_MAX*64 + .5);
+    static const int ssim_c2 = (int)(.03*.03*PIXEL_MAX*PIXEL_MAX*64*63 + .5);
+#endif
+    type fs1 = s1;
+    type fs2 = s2;
+    type fss = ss;
+    type fs12 = s12;
+    type vars = fss*64 - fs1*fs1 - fs2*fs2;
+    type covar = fs12*64 - fs1*fs2;
+    return (float)(2*fs1*fs2 + ssim_c1) * (float)(2*covar + ssim_c2)
+         / ((float)(fs1*fs1 + fs2*fs2 + ssim_c1) * (float)(vars + ssim_c2));
+#undef type
+}
+
+static float ssim_end4( int sum0[5][4], int sum1[5][4], int width )
+{
+    float ssim = 0.0;
+    for( int i = 0; i < width; i++ )
+        ssim += ssim_end1( sum0[i][0] + sum0[i+1][0] + sum1[i][0] + sum1[i+1][0],
+                           sum0[i][1] + sum0[i+1][1] + sum1[i][1] + sum1[i+1][1],
+                           sum0[i][2] + sum0[i+1][2] + sum1[i][2] + sum1[i+1][2],
+                           sum0[i][3] + sum0[i+1][3] + sum1[i][3] + sum1[i+1][3] );
+    return ssim;
+}
+
+float ssim_plane(
+                           pixel *pix1, intptr_t stride1,
+                           pixel *pix2, intptr_t stride2,
+                           int width, int height, void *buf, int *cnt )
+{
+    int z = 0;
+    float ssim = 0.0;
+    int (*sum0)[4] = buf;
+    int (*sum1)[4] = sum0 + (width >> 2) + 3;
+    width >>= 2;
+    height >>= 2;
+    for( int y = 1; y < height; y++ )
+    {
+        for( ; z <= y; z++ )
+        {
+            FFSWAP( void*, sum0, sum1 );
+            for( int x = 0; x < width; x+=2 )
+                ssim_4x4x2_core( &pix1[4*(x+z*stride1)], stride1, &pix2[4*(x+z*stride2)], stride2, &sum0[x] );
+        }
+        for( int x = 0; x < width-1; x += 4 )
+            ssim += ssim_end4( sum0+x, sum1+x, FFMIN(4,width-x-1) );
+    }
+//     *cnt = (height-1) * (width-1);
+    return ssim / ((height-1) * (width-1));
+}
+
+
+uint64_t ssd_plane( const uint8_t *pix1, const uint8_t *pix2, int size )
+{
+    uint64_t ssd = 0;
+    int i;
+    for( i=0; i<size; i++ )
+    {
+        int d = pix1[i] - pix2[i];
+        ssd += d*d;
+    }
+    return ssd;
+}
+
+double ssd_to_psnr( uint64_t ssd, uint64_t denom )
+{
+    return -10*log((double)ssd/(denom*255*255))/log(10);
+}
+
+int main(int argc, char* argv[])
+{
+    FILE *f[2];
+    uint8_t *buf[2], *plane[2][3];
+    int *temp;
+    uint64_t ssd[3] = {0,0,0};
+    double ssim[3] = {0,0,0};
+    int frame_size, w, h;
+    int frames, seek;
+    int i;
+
+    if( argc<4 || 2 != sscanf(argv[3], "%dx%d", &w, &h) )
+    {
+        printf("tiny_ssim <file1.yuv> <file2.yuv> <width>x<height> [<seek>]\n");
+        return -1;
+    }
+
+    f[0] = fopen(argv[1], "rb");
+    f[1] = fopen(argv[2], "rb");
+    sscanf(argv[3], "%dx%d", &w, &h);
+    frame_size = w*h*3/2;
+    for( i=0; i<2; i++ )
+    {
+        buf[i] = malloc(frame_size);
+        plane[i][0] = buf[i];
+        plane[i][1] = plane[i][0] + w*h;
+        plane[i][2] = plane[i][1] + w*h/4;
+    }
+    temp = malloc((2*w+12)*sizeof(*temp));
+    seek = argc<5 ? 0 : atoi(argv[4]);
+    fseek(f[seek<0], seek < 0 ? -seek : seek, SEEK_SET);
+
+    for( frames=0;; frames++ )
+    {
+        if( fread(buf[0], frame_size, 1, f[0]) != 1) break;
+        if( fread(buf[1], frame_size, 1, f[1]) != 1) break;
+        for( i=0; i<3; i++ )
+        {
+            ssd[i]  += ssd_plane ( plane[0][i], plane[1][i], w*h>>2*!!i );
+            ssim[i] += ssim_plane( plane[0][i], w>>!!i,
+                                   plane[1][i], w>>!!i,
+                                   w>>!!i, h>>!!i, temp, NULL );
+        }
+    }
+
+    if( !frames ) return 0;
+
+    printf( "PSNR Y:%.3f  U:%.3f  V:%.3f  All:%.3f\n",
+            ssd_to_psnr( ssd[0], (uint64_t)frames*w*h ),
+            ssd_to_psnr( ssd[1], (uint64_t)frames*w*h/4 ),
+            ssd_to_psnr( ssd[2], (uint64_t)frames*w*h/4 ),
+            ssd_to_psnr( ssd[0] + ssd[1] + ssd[2], (uint64_t)frames*w*h*3/2 ) );
+    printf( "SSIM Y:%.5f U:%.5f V:%.5f All:%.5f\n",
+            ssim[0] / frames,
+            ssim[1] / frames,
+            ssim[2] / frames,
+            (ssim[0]*4 + ssim[1] + ssim[2]) / (frames*6) );
+
+    return 0;
+}