1da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian/*
2da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *
4da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  Use of this source code is governed by a BSD-style license
5da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  that can be found in the LICENSE file in the root of the source
6da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  tree. An additional intellectual property rights grant can be found
7da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  in the file PATENTS.  All contributing project authors may
8da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  be found in the AUTHORS file in the root of the source tree.
9da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *
10da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  This code was originally written by: Nathan E. Egge, at the Daala
11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  project.
12da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian */
137bc9febe8749e98a3812a0dc4380ceae75c29450Johann#include <assert.h>
14da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include <math.h>
15da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include <stdlib.h>
16da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include <string.h>
17da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vpx_config.h"
18da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vpx_dsp_rtcd.h"
19da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/ssim.h"
20da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_ports/system_state.h"
217bc9febe8749e98a3812a0dc4380ceae75c29450Johann
22da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramaniantypedef struct fs_level fs_level;
23da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramaniantypedef struct fs_ctx fs_ctx;
24da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
25da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#define SSIM_C1 (255 * 255 * 0.01 * 0.01)
26da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#define SSIM_C2 (255 * 255 * 0.03 * 0.03)
277bc9febe8749e98a3812a0dc4380ceae75c29450Johann#if CONFIG_VP9_HIGHBITDEPTH
287bc9febe8749e98a3812a0dc4380ceae75c29450Johann#define SSIM_C1_10 (1023 * 1023 * 0.01 * 0.01)
297bc9febe8749e98a3812a0dc4380ceae75c29450Johann#define SSIM_C1_12 (4095 * 4095 * 0.01 * 0.01)
307bc9febe8749e98a3812a0dc4380ceae75c29450Johann#define SSIM_C2_10 (1023 * 1023 * 0.03 * 0.03)
317bc9febe8749e98a3812a0dc4380ceae75c29450Johann#define SSIM_C2_12 (4095 * 4095 * 0.03 * 0.03)
327bc9febe8749e98a3812a0dc4380ceae75c29450Johann#endif
33da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#define FS_MINI(_a, _b) ((_a) < (_b) ? (_a) : (_b))
34da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#define FS_MAXI(_a, _b) ((_a) > (_b) ? (_a) : (_b))
35da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
36da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstruct fs_level {
377bc9febe8749e98a3812a0dc4380ceae75c29450Johann  uint32_t *im1;
387bc9febe8749e98a3812a0dc4380ceae75c29450Johann  uint32_t *im2;
39da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  double *ssim;
40da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int w;
41da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int h;
42da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian};
43da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
44da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstruct fs_ctx {
45da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  fs_level *level;
46da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int nlevels;
47da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  unsigned *col_buf;
48da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian};
49da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
50da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void fs_ctx_init(fs_ctx *_ctx, int _w, int _h, int _nlevels) {
51da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  unsigned char *data;
52da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  size_t data_size;
53da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int lw;
54da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int lh;
55da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int l;
56da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  lw = (_w + 1) >> 1;
57da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  lh = (_h + 1) >> 1;
587bc9febe8749e98a3812a0dc4380ceae75c29450Johann  data_size =
597bc9febe8749e98a3812a0dc4380ceae75c29450Johann      _nlevels * sizeof(fs_level) + 2 * (lw + 8) * 8 * sizeof(*_ctx->col_buf);
60da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  for (l = 0; l < _nlevels; l++) {
61da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    size_t im_size;
62da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    size_t level_size;
637bc9febe8749e98a3812a0dc4380ceae75c29450Johann    im_size = lw * (size_t)lh;
64da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    level_size = 2 * im_size * sizeof(*_ctx->level[l].im1);
65da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    level_size += sizeof(*_ctx->level[l].ssim) - 1;
66da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    level_size /= sizeof(*_ctx->level[l].ssim);
67da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    level_size += im_size;
68da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    level_size *= sizeof(*_ctx->level[l].ssim);
69da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    data_size += level_size;
70da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    lw = (lw + 1) >> 1;
71da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    lh = (lh + 1) >> 1;
72da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
737bc9febe8749e98a3812a0dc4380ceae75c29450Johann  data = (unsigned char *)malloc(data_size);
747bc9febe8749e98a3812a0dc4380ceae75c29450Johann  _ctx->level = (fs_level *)data;
75da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  _ctx->nlevels = _nlevels;
76da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  data += _nlevels * sizeof(*_ctx->level);
77da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  lw = (_w + 1) >> 1;
78da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  lh = (_h + 1) >> 1;
79da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  for (l = 0; l < _nlevels; l++) {
80da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    size_t im_size;
81da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    size_t level_size;
82da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    _ctx->level[l].w = lw;
83da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    _ctx->level[l].h = lh;
847bc9febe8749e98a3812a0dc4380ceae75c29450Johann    im_size = lw * (size_t)lh;
85da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    level_size = 2 * im_size * sizeof(*_ctx->level[l].im1);
86da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    level_size += sizeof(*_ctx->level[l].ssim) - 1;
87da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    level_size /= sizeof(*_ctx->level[l].ssim);
88da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    level_size *= sizeof(*_ctx->level[l].ssim);
897bc9febe8749e98a3812a0dc4380ceae75c29450Johann    _ctx->level[l].im1 = (uint32_t *)data;
90da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    _ctx->level[l].im2 = _ctx->level[l].im1 + im_size;
91da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    data += level_size;
927bc9febe8749e98a3812a0dc4380ceae75c29450Johann    _ctx->level[l].ssim = (double *)data;
93da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    data += im_size * sizeof(*_ctx->level[l].ssim);
94da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    lw = (lw + 1) >> 1;
95da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    lh = (lh + 1) >> 1;
96da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
977bc9febe8749e98a3812a0dc4380ceae75c29450Johann  _ctx->col_buf = (unsigned *)data;
98da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
99da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1007bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void fs_ctx_clear(fs_ctx *_ctx) { free(_ctx->level); }
101da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
102da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void fs_downsample_level(fs_ctx *_ctx, int _l) {
1037bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const uint32_t *src1;
1047bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const uint32_t *src2;
1057bc9febe8749e98a3812a0dc4380ceae75c29450Johann  uint32_t *dst1;
1067bc9febe8749e98a3812a0dc4380ceae75c29450Johann  uint32_t *dst2;
107da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int w2;
108da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int h2;
109da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int w;
110da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int h;
111da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int i;
112da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int j;
113da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  w = _ctx->level[_l].w;
114da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  h = _ctx->level[_l].h;
115da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  dst1 = _ctx->level[_l].im1;
116da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  dst2 = _ctx->level[_l].im2;
117da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  w2 = _ctx->level[_l - 1].w;
118da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  h2 = _ctx->level[_l - 1].h;
119da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  src1 = _ctx->level[_l - 1].im1;
120da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  src2 = _ctx->level[_l - 1].im2;
121da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  for (j = 0; j < h; j++) {
122da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    int j0offs;
123da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    int j1offs;
124da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    j0offs = 2 * j * w2;
125da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    j1offs = FS_MINI(2 * j + 1, h2) * w2;
126da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    for (i = 0; i < w; i++) {
127da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      int i0;
128da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      int i1;
129da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      i0 = 2 * i;
130da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      i1 = FS_MINI(i0 + 1, w2);
1317bc9febe8749e98a3812a0dc4380ceae75c29450Johann      dst1[j * w + i] = src1[j0offs + i0] + src1[j0offs + i1] +
1327bc9febe8749e98a3812a0dc4380ceae75c29450Johann                        src1[j1offs + i0] + src1[j1offs + i1];
1337bc9febe8749e98a3812a0dc4380ceae75c29450Johann      dst2[j * w + i] = src2[j0offs + i0] + src2[j0offs + i1] +
1347bc9febe8749e98a3812a0dc4380ceae75c29450Johann                        src2[j1offs + i0] + src2[j1offs + i1];
135da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
136da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
137da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
138da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1397bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void fs_downsample_level0(fs_ctx *_ctx, const uint8_t *_src1,
1407bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                 int _s1ystride, const uint8_t *_src2,
1417bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                 int _s2ystride, int _w, int _h, uint32_t bd,
1427bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                 uint32_t shift) {
1437bc9febe8749e98a3812a0dc4380ceae75c29450Johann  uint32_t *dst1;
1447bc9febe8749e98a3812a0dc4380ceae75c29450Johann  uint32_t *dst2;
145da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int w;
146da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int h;
147da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int i;
148da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int j;
149da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  w = _ctx->level[0].w;
150da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  h = _ctx->level[0].h;
151da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  dst1 = _ctx->level[0].im1;
152da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  dst2 = _ctx->level[0].im2;
153da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  for (j = 0; j < h; j++) {
154da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    int j0;
155da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    int j1;
156da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    j0 = 2 * j;
157da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    j1 = FS_MINI(j0 + 1, _h);
158da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    for (i = 0; i < w; i++) {
159da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      int i0;
160da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      int i1;
161da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      i0 = 2 * i;
162da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      i1 = FS_MINI(i0 + 1, _w);
1637bc9febe8749e98a3812a0dc4380ceae75c29450Johann      if (bd == 8 && shift == 0) {
1647bc9febe8749e98a3812a0dc4380ceae75c29450Johann        dst1[j * w + i] =
1657bc9febe8749e98a3812a0dc4380ceae75c29450Johann            _src1[j0 * _s1ystride + i0] + _src1[j0 * _s1ystride + i1] +
1667bc9febe8749e98a3812a0dc4380ceae75c29450Johann            _src1[j1 * _s1ystride + i0] + _src1[j1 * _s1ystride + i1];
1677bc9febe8749e98a3812a0dc4380ceae75c29450Johann        dst2[j * w + i] =
1687bc9febe8749e98a3812a0dc4380ceae75c29450Johann            _src2[j0 * _s2ystride + i0] + _src2[j0 * _s2ystride + i1] +
1697bc9febe8749e98a3812a0dc4380ceae75c29450Johann            _src2[j1 * _s2ystride + i0] + _src2[j1 * _s2ystride + i1];
1707bc9febe8749e98a3812a0dc4380ceae75c29450Johann      } else {
1717bc9febe8749e98a3812a0dc4380ceae75c29450Johann        uint16_t *src1s = CONVERT_TO_SHORTPTR(_src1);
1727bc9febe8749e98a3812a0dc4380ceae75c29450Johann        uint16_t *src2s = CONVERT_TO_SHORTPTR(_src2);
1737bc9febe8749e98a3812a0dc4380ceae75c29450Johann        dst1[j * w + i] = (src1s[j0 * _s1ystride + i0] >> shift) +
1747bc9febe8749e98a3812a0dc4380ceae75c29450Johann                          (src1s[j0 * _s1ystride + i1] >> shift) +
1757bc9febe8749e98a3812a0dc4380ceae75c29450Johann                          (src1s[j1 * _s1ystride + i0] >> shift) +
1767bc9febe8749e98a3812a0dc4380ceae75c29450Johann                          (src1s[j1 * _s1ystride + i1] >> shift);
1777bc9febe8749e98a3812a0dc4380ceae75c29450Johann        dst2[j * w + i] = (src2s[j0 * _s2ystride + i0] >> shift) +
1787bc9febe8749e98a3812a0dc4380ceae75c29450Johann                          (src2s[j0 * _s2ystride + i1] >> shift) +
1797bc9febe8749e98a3812a0dc4380ceae75c29450Johann                          (src2s[j1 * _s2ystride + i0] >> shift) +
1807bc9febe8749e98a3812a0dc4380ceae75c29450Johann                          (src2s[j1 * _s2ystride + i1] >> shift);
1817bc9febe8749e98a3812a0dc4380ceae75c29450Johann      }
182da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
183da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
184da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
185da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1867bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) {
187da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  unsigned *col_sums_x;
188da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  unsigned *col_sums_y;
1897bc9febe8749e98a3812a0dc4380ceae75c29450Johann  uint32_t *im1;
1907bc9febe8749e98a3812a0dc4380ceae75c29450Johann  uint32_t *im2;
191da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  double *ssim;
192da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  double c1;
193da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int w;
194da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int h;
195da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int j0offs;
196da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int j1offs;
197da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int i;
198da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int j;
1997bc9febe8749e98a3812a0dc4380ceae75c29450Johann  double ssim_c1 = SSIM_C1;
2007bc9febe8749e98a3812a0dc4380ceae75c29450Johann#if CONFIG_VP9_HIGHBITDEPTH
2017bc9febe8749e98a3812a0dc4380ceae75c29450Johann  if (bit_depth == 10) ssim_c1 = SSIM_C1_10;
2027bc9febe8749e98a3812a0dc4380ceae75c29450Johann  if (bit_depth == 12) ssim_c1 = SSIM_C1_12;
2037bc9febe8749e98a3812a0dc4380ceae75c29450Johann#else
2047bc9febe8749e98a3812a0dc4380ceae75c29450Johann  assert(bit_depth == 8);
2057bc9febe8749e98a3812a0dc4380ceae75c29450Johann  (void)bit_depth;
2067bc9febe8749e98a3812a0dc4380ceae75c29450Johann#endif
207da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  w = _ctx->level[_l].w;
208da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  h = _ctx->level[_l].h;
209da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  col_sums_x = _ctx->col_buf;
210da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  col_sums_y = col_sums_x + w;
211da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  im1 = _ctx->level[_l].im1;
212da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  im2 = _ctx->level[_l].im2;
2137bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (i = 0; i < w; i++) col_sums_x[i] = 5 * im1[i];
2147bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (i = 0; i < w; i++) col_sums_y[i] = 5 * im2[i];
215da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  for (j = 1; j < 4; j++) {
216da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    j1offs = FS_MINI(j, h - 1) * w;
2177bc9febe8749e98a3812a0dc4380ceae75c29450Johann    for (i = 0; i < w; i++) col_sums_x[i] += im1[j1offs + i];
2187bc9febe8749e98a3812a0dc4380ceae75c29450Johann    for (i = 0; i < w; i++) col_sums_y[i] += im2[j1offs + i];
219da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
220da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  ssim = _ctx->level[_l].ssim;
2217bc9febe8749e98a3812a0dc4380ceae75c29450Johann  c1 = (double)(ssim_c1 * 4096 * (1 << 4 * _l));
222da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  for (j = 0; j < h; j++) {
223da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    unsigned mux;
224da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    unsigned muy;
225da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    int i0;
226da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    int i1;
227da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    mux = 5 * col_sums_x[0];
228da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    muy = 5 * col_sums_y[0];
229da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    for (i = 1; i < 4; i++) {
230da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      i1 = FS_MINI(i, w - 1);
231da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      mux += col_sums_x[i1];
232da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      muy += col_sums_y[i1];
233da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
234da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    for (i = 0; i < w; i++) {
2357bc9febe8749e98a3812a0dc4380ceae75c29450Johann      ssim[j * w + i] *= (2 * mux * (double)muy + c1) /
2367bc9febe8749e98a3812a0dc4380ceae75c29450Johann                         (mux * (double)mux + muy * (double)muy + c1);
237da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      if (i + 1 < w) {
238da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        i0 = FS_MAXI(0, i - 4);
239da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        i1 = FS_MINI(i + 4, w - 1);
240da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        mux += col_sums_x[i1] - col_sums_x[i0];
241da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        muy += col_sums_x[i1] - col_sums_x[i0];
242da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      }
243da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
244da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    if (j + 1 < h) {
245da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      j0offs = FS_MAXI(0, j - 4) * w;
2467bc9febe8749e98a3812a0dc4380ceae75c29450Johann      for (i = 0; i < w; i++) col_sums_x[i] -= im1[j0offs + i];
2477bc9febe8749e98a3812a0dc4380ceae75c29450Johann      for (i = 0; i < w; i++) col_sums_y[i] -= im2[j0offs + i];
248da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      j1offs = FS_MINI(j + 4, h - 1) * w;
2497bc9febe8749e98a3812a0dc4380ceae75c29450Johann      for (i = 0; i < w; i++) col_sums_x[i] += im1[j1offs + i];
2507bc9febe8749e98a3812a0dc4380ceae75c29450Johann      for (i = 0; i < w; i++) col_sums_y[i] += im2[j1offs + i];
251da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
252da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
253da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
254da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
2557bc9febe8749e98a3812a0dc4380ceae75c29450Johann#define FS_COL_SET(_col, _joffs, _ioffs)                       \
2567bc9febe8749e98a3812a0dc4380ceae75c29450Johann  do {                                                         \
2577bc9febe8749e98a3812a0dc4380ceae75c29450Johann    unsigned gx;                                               \
2587bc9febe8749e98a3812a0dc4380ceae75c29450Johann    unsigned gy;                                               \
259da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
260da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
2617bc9febe8749e98a3812a0dc4380ceae75c29450Johann    col_sums_gx2[(_col)] = gx * (double)gx;                    \
2627bc9febe8749e98a3812a0dc4380ceae75c29450Johann    col_sums_gy2[(_col)] = gy * (double)gy;                    \
2637bc9febe8749e98a3812a0dc4380ceae75c29450Johann    col_sums_gxgy[(_col)] = gx * (double)gy;                   \
2647bc9febe8749e98a3812a0dc4380ceae75c29450Johann  } while (0)
265da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
2667bc9febe8749e98a3812a0dc4380ceae75c29450Johann#define FS_COL_ADD(_col, _joffs, _ioffs)                       \
2677bc9febe8749e98a3812a0dc4380ceae75c29450Johann  do {                                                         \
2687bc9febe8749e98a3812a0dc4380ceae75c29450Johann    unsigned gx;                                               \
2697bc9febe8749e98a3812a0dc4380ceae75c29450Johann    unsigned gy;                                               \
270da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
271da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
2727bc9febe8749e98a3812a0dc4380ceae75c29450Johann    col_sums_gx2[(_col)] += gx * (double)gx;                   \
2737bc9febe8749e98a3812a0dc4380ceae75c29450Johann    col_sums_gy2[(_col)] += gy * (double)gy;                   \
2747bc9febe8749e98a3812a0dc4380ceae75c29450Johann    col_sums_gxgy[(_col)] += gx * (double)gy;                  \
2757bc9febe8749e98a3812a0dc4380ceae75c29450Johann  } while (0)
276da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
2777bc9febe8749e98a3812a0dc4380ceae75c29450Johann#define FS_COL_SUB(_col, _joffs, _ioffs)                       \
2787bc9febe8749e98a3812a0dc4380ceae75c29450Johann  do {                                                         \
2797bc9febe8749e98a3812a0dc4380ceae75c29450Johann    unsigned gx;                                               \
2807bc9febe8749e98a3812a0dc4380ceae75c29450Johann    unsigned gy;                                               \
281da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
282da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
2837bc9febe8749e98a3812a0dc4380ceae75c29450Johann    col_sums_gx2[(_col)] -= gx * (double)gx;                   \
2847bc9febe8749e98a3812a0dc4380ceae75c29450Johann    col_sums_gy2[(_col)] -= gy * (double)gy;                   \
2857bc9febe8749e98a3812a0dc4380ceae75c29450Johann    col_sums_gxgy[(_col)] -= gx * (double)gy;                  \
2867bc9febe8749e98a3812a0dc4380ceae75c29450Johann  } while (0)
287da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
2887bc9febe8749e98a3812a0dc4380ceae75c29450Johann#define FS_COL_COPY(_col1, _col2)                    \
2897bc9febe8749e98a3812a0dc4380ceae75c29450Johann  do {                                               \
2907bc9febe8749e98a3812a0dc4380ceae75c29450Johann    col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)];   \
2917bc9febe8749e98a3812a0dc4380ceae75c29450Johann    col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)];   \
292da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)]; \
2937bc9febe8749e98a3812a0dc4380ceae75c29450Johann  } while (0)
294da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
2957bc9febe8749e98a3812a0dc4380ceae75c29450Johann#define FS_COL_HALVE(_col1, _col2)                         \
2967bc9febe8749e98a3812a0dc4380ceae75c29450Johann  do {                                                     \
2977bc9febe8749e98a3812a0dc4380ceae75c29450Johann    col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)] * 0.5;   \
2987bc9febe8749e98a3812a0dc4380ceae75c29450Johann    col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)] * 0.5;   \
299da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)] * 0.5; \
3007bc9febe8749e98a3812a0dc4380ceae75c29450Johann  } while (0)
301da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
3027bc9febe8749e98a3812a0dc4380ceae75c29450Johann#define FS_COL_DOUBLE(_col1, _col2)                      \
3037bc9febe8749e98a3812a0dc4380ceae75c29450Johann  do {                                                   \
3047bc9febe8749e98a3812a0dc4380ceae75c29450Johann    col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)] * 2;   \
3057bc9febe8749e98a3812a0dc4380ceae75c29450Johann    col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)] * 2;   \
306da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)] * 2; \
3077bc9febe8749e98a3812a0dc4380ceae75c29450Johann  } while (0)
308da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
3097bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void fs_calc_structure(fs_ctx *_ctx, int _l, int bit_depth) {
3107bc9febe8749e98a3812a0dc4380ceae75c29450Johann  uint32_t *im1;
3117bc9febe8749e98a3812a0dc4380ceae75c29450Johann  uint32_t *im2;
312da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  unsigned *gx_buf;
313da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  unsigned *gy_buf;
314da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  double *ssim;
315da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  double col_sums_gx2[8];
316da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  double col_sums_gy2[8];
317da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  double col_sums_gxgy[8];
318da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  double c2;
319da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int stride;
320da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int w;
321da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int h;
322da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int i;
323da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int j;
3247bc9febe8749e98a3812a0dc4380ceae75c29450Johann  double ssim_c2 = SSIM_C2;
3257bc9febe8749e98a3812a0dc4380ceae75c29450Johann#if CONFIG_VP9_HIGHBITDEPTH
3267bc9febe8749e98a3812a0dc4380ceae75c29450Johann  if (bit_depth == 10) ssim_c2 = SSIM_C2_10;
3277bc9febe8749e98a3812a0dc4380ceae75c29450Johann  if (bit_depth == 12) ssim_c2 = SSIM_C2_12;
3287bc9febe8749e98a3812a0dc4380ceae75c29450Johann#else
3297bc9febe8749e98a3812a0dc4380ceae75c29450Johann  assert(bit_depth == 8);
3307bc9febe8749e98a3812a0dc4380ceae75c29450Johann  (void)bit_depth;
3317bc9febe8749e98a3812a0dc4380ceae75c29450Johann#endif
3327bc9febe8749e98a3812a0dc4380ceae75c29450Johann
333da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  w = _ctx->level[_l].w;
334da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  h = _ctx->level[_l].h;
335da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  im1 = _ctx->level[_l].im1;
336da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  im2 = _ctx->level[_l].im2;
337da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  ssim = _ctx->level[_l].ssim;
338da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  gx_buf = _ctx->col_buf;
339da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  stride = w + 8;
340da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  gy_buf = gx_buf + 8 * stride;
341da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  memset(gx_buf, 0, 2 * 8 * stride * sizeof(*gx_buf));
3427bc9febe8749e98a3812a0dc4380ceae75c29450Johann  c2 = ssim_c2 * (1 << 4 * _l) * 16 * 104;
343da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  for (j = 0; j < h + 4; j++) {
344da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    if (j < h - 1) {
345da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      for (i = 0; i < w - 1; i++) {
346da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        unsigned g1;
347da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        unsigned g2;
348da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        unsigned gx;
349da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        unsigned gy;
3507bc9febe8749e98a3812a0dc4380ceae75c29450Johann        g1 = abs((int)im1[(j + 1) * w + i + 1] - (int)im1[j * w + i]);
3517bc9febe8749e98a3812a0dc4380ceae75c29450Johann        g2 = abs((int)im1[(j + 1) * w + i] - (int)im1[j * w + i + 1]);
352da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        gx = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2);
3537bc9febe8749e98a3812a0dc4380ceae75c29450Johann        g1 = abs((int)im2[(j + 1) * w + i + 1] - (int)im2[j * w + i]);
3547bc9febe8749e98a3812a0dc4380ceae75c29450Johann        g2 = abs((int)im2[(j + 1) * w + i] - (int)im2[j * w + i + 1]);
355da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        gy = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2);
356da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        gx_buf[(j & 7) * stride + i + 4] = gx;
357da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        gy_buf[(j & 7) * stride + i + 4] = gy;
358da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      }
359da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    } else {
360da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      memset(gx_buf + (j & 7) * stride, 0, stride * sizeof(*gx_buf));
361da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      memset(gy_buf + (j & 7) * stride, 0, stride * sizeof(*gy_buf));
362da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
363da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    if (j >= 4) {
364da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      int k;
365da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      col_sums_gx2[3] = col_sums_gx2[2] = col_sums_gx2[1] = col_sums_gx2[0] = 0;
366da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      col_sums_gy2[3] = col_sums_gy2[2] = col_sums_gy2[1] = col_sums_gy2[0] = 0;
367da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      col_sums_gxgy[3] = col_sums_gxgy[2] = col_sums_gxgy[1] =
368da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          col_sums_gxgy[0] = 0;
369da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      for (i = 4; i < 8; i++) {
370da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        FS_COL_SET(i, -1, 0);
371da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        FS_COL_ADD(i, 0, 0);
372da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        for (k = 1; k < 8 - i; k++) {
373da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_DOUBLE(i, i);
374da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_ADD(i, -k - 1, 0);
375da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_ADD(i, k, 0);
376da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        }
377da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      }
378da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      for (i = 0; i < w; i++) {
379da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        double mugx2;
380da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        double mugy2;
381da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        double mugxgy;
382da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        mugx2 = col_sums_gx2[0];
3837bc9febe8749e98a3812a0dc4380ceae75c29450Johann        for (k = 1; k < 8; k++) mugx2 += col_sums_gx2[k];
384da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        mugy2 = col_sums_gy2[0];
3857bc9febe8749e98a3812a0dc4380ceae75c29450Johann        for (k = 1; k < 8; k++) mugy2 += col_sums_gy2[k];
386da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        mugxgy = col_sums_gxgy[0];
3877bc9febe8749e98a3812a0dc4380ceae75c29450Johann        for (k = 1; k < 8; k++) mugxgy += col_sums_gxgy[k];
388da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        ssim[(j - 4) * w + i] = (2 * mugxgy + c2) / (mugx2 + mugy2 + c2);
389da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        if (i + 1 < w) {
390da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_SET(0, -1, 1);
391da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_ADD(0, 0, 1);
392da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_SUB(2, -3, 2);
393da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_SUB(2, 2, 2);
394da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_HALVE(1, 2);
395da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_SUB(3, -4, 3);
396da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_SUB(3, 3, 3);
397da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_HALVE(2, 3);
398da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_COPY(3, 4);
399da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_DOUBLE(4, 5);
400da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_ADD(4, -4, 5);
401da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_ADD(4, 3, 5);
402da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_DOUBLE(5, 6);
403da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_ADD(5, -3, 6);
404da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_ADD(5, 2, 6);
405da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_DOUBLE(6, 7);
406da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_ADD(6, -2, 7);
407da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_ADD(6, 1, 7);
408da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_SET(7, -1, 8);
409da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian          FS_COL_ADD(7, 0, 8);
410da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        }
411da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      }
412da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
413da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
414da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
415da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
416da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#define FS_NLEVELS (4)
417da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
418da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian/*These weights were derived from the default weights found in Wang's original
419da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian Matlab implementation: {0.0448, 0.2856, 0.2363, 0.1333}.
420da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian We drop the finest scale and renormalize the rest to sum to 1.*/
421da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
4227bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic const double FS_WEIGHTS[FS_NLEVELS] = {
4237bc9febe8749e98a3812a0dc4380ceae75c29450Johann  0.2989654541015625, 0.3141326904296875, 0.2473602294921875, 0.1395416259765625
4247bc9febe8749e98a3812a0dc4380ceae75c29450Johann};
425da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
426da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic double fs_average(fs_ctx *_ctx, int _l) {
427da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  double *ssim;
428da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  double ret;
429da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int w;
430da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int h;
431da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int i;
432da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int j;
433da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  w = _ctx->level[_l].w;
434da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  h = _ctx->level[_l].h;
435da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  ssim = _ctx->level[_l].ssim;
436da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  ret = 0;
437da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  for (j = 0; j < h; j++)
4387bc9febe8749e98a3812a0dc4380ceae75c29450Johann    for (i = 0; i < w; i++) ret += ssim[j * w + i];
439da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  return pow(ret / (w * h), FS_WEIGHTS[_l]);
440da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
441da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
4427bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic double convert_ssim_db(double _ssim, double _weight) {
4437bc9febe8749e98a3812a0dc4380ceae75c29450Johann  assert(_weight >= _ssim);
4447bc9febe8749e98a3812a0dc4380ceae75c29450Johann  if ((_weight - _ssim) < 1e-10) return MAX_SSIM_DB;
4457bc9febe8749e98a3812a0dc4380ceae75c29450Johann  return 10 * (log10(_weight) - log10(_weight - _ssim));
4467bc9febe8749e98a3812a0dc4380ceae75c29450Johann}
4477bc9febe8749e98a3812a0dc4380ceae75c29450Johann
4487bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic double calc_ssim(const uint8_t *_src, int _systride, const uint8_t *_dst,
4497bc9febe8749e98a3812a0dc4380ceae75c29450Johann                        int _dystride, int _w, int _h, uint32_t _bd,
4507bc9febe8749e98a3812a0dc4380ceae75c29450Johann                        uint32_t _shift) {
451da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  fs_ctx ctx;
452da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  double ret;
453da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int l;
454da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  ret = 1;
455da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  fs_ctx_init(&ctx, _w, _h, FS_NLEVELS);
4567bc9febe8749e98a3812a0dc4380ceae75c29450Johann  fs_downsample_level0(&ctx, _src, _systride, _dst, _dystride, _w, _h, _bd,
4577bc9febe8749e98a3812a0dc4380ceae75c29450Johann                       _shift);
458da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  for (l = 0; l < FS_NLEVELS - 1; l++) {
4597bc9febe8749e98a3812a0dc4380ceae75c29450Johann    fs_calc_structure(&ctx, l, _bd);
460da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    ret *= fs_average(&ctx, l);
461da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    fs_downsample_level(&ctx, l + 1);
462da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
4637bc9febe8749e98a3812a0dc4380ceae75c29450Johann  fs_calc_structure(&ctx, l, _bd);
4647bc9febe8749e98a3812a0dc4380ceae75c29450Johann  fs_apply_luminance(&ctx, l, _bd);
465da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  ret *= fs_average(&ctx, l);
466da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  fs_ctx_clear(&ctx);
467da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  return ret;
468da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
469da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
470da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramaniandouble vpx_calc_fastssim(const YV12_BUFFER_CONFIG *source,
4717bc9febe8749e98a3812a0dc4380ceae75c29450Johann                         const YV12_BUFFER_CONFIG *dest, double *ssim_y,
4727bc9febe8749e98a3812a0dc4380ceae75c29450Johann                         double *ssim_u, double *ssim_v, uint32_t bd,
4737bc9febe8749e98a3812a0dc4380ceae75c29450Johann                         uint32_t in_bd) {
474da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  double ssimv;
4757bc9febe8749e98a3812a0dc4380ceae75c29450Johann  uint32_t bd_shift = 0;
476da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  vpx_clear_system_state();
4777bc9febe8749e98a3812a0dc4380ceae75c29450Johann  assert(bd >= in_bd);
4787bc9febe8749e98a3812a0dc4380ceae75c29450Johann  bd_shift = bd - in_bd;
479da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
480da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  *ssim_y = calc_ssim(source->y_buffer, source->y_stride, dest->y_buffer,
481da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                      dest->y_stride, source->y_crop_width,
4827bc9febe8749e98a3812a0dc4380ceae75c29450Johann                      source->y_crop_height, in_bd, bd_shift);
483da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  *ssim_u = calc_ssim(source->u_buffer, source->uv_stride, dest->u_buffer,
484da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                      dest->uv_stride, source->uv_crop_width,
4857bc9febe8749e98a3812a0dc4380ceae75c29450Johann                      source->uv_crop_height, in_bd, bd_shift);
486da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  *ssim_v = calc_ssim(source->v_buffer, source->uv_stride, dest->v_buffer,
487da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                      dest->uv_stride, source->uv_crop_width,
4887bc9febe8749e98a3812a0dc4380ceae75c29450Johann                      source->uv_crop_height, in_bd, bd_shift);
489da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
4907bc9febe8749e98a3812a0dc4380ceae75c29450Johann  ssimv = (*ssim_y) * .8 + .1 * ((*ssim_u) + (*ssim_v));
491da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  return convert_ssim_db(ssimv, 1.0);
492da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
493