1e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com/* 2e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com * Copyright 2013 The LibYuv Project Authors. All rights reserved. 3e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com * 4e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com * Use of this source code is governed by a BSD-style license 5e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com * that can be found in the LICENSE file in the root of the source 6e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com * tree. An additional intellectual property rights grant can be found 7e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com * in the file PATENTS. All contributing project authors may 8e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com * be found in the AUTHORS file in the root of the source tree. 9e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com */ 10e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 116da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com#include "../util/ssim.h" // NOLINT 12e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 13e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#include <string.h> 14e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 1533d34eaad81ca9b3e05055f5a669c7ec3cd3fe61fbarchard@google.com#ifdef __cplusplus 1633d34eaad81ca9b3e05055f5a669c7ec3cd3fe61fbarchard@google.comextern "C" { 1733d34eaad81ca9b3e05055f5a669c7ec3cd3fe61fbarchard@google.com#endif 1833d34eaad81ca9b3e05055f5a669c7ec3cd3fe61fbarchard@google.com 19e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.comtypedef unsigned int uint32; // NOLINT 20e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.comtypedef unsigned short uint16; // NOLINT 21e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 221a8c791611e99c8d61c80bb399fa4bb6df4f661ffbarchard@google.com#if !defined(LIBYUV_DISABLE_X86) && !defined(__SSE2__) && \ 23b432b7da2d8dd78db1fb4621f1be1a3c12d49cb2fbarchard@google.com (defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2))) 24e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#define __SSE2__ 25e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#endif 26b432b7da2d8dd78db1fb4621f1be1a3c12d49cb2fbarchard@google.com#if !defined(LIBYUV_DISABLE_X86) && defined(__SSE2__) 27e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#include <emmintrin.h> 28e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#endif 29e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 30e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#ifdef _OPENMP 31e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#include <omp.h> 32e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#endif 33e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 34e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com// SSIM 35e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.comenum { KERNEL = 3, KERNEL_SIZE = 2 * KERNEL + 1 }; 36e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 37e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com// Symmetric Gaussian kernel: K[i] = ~11 * exp(-0.3 * i * i) 38e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com// The maximum value (11 x 11) must be less than 128 to avoid sign 39e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com// problems during the calls to _mm_mullo_epi16(). 40e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.comstatic const int K[KERNEL_SIZE] = { 41e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 1, 3, 7, 11, 7, 3, 1 // ~11 * exp(-0.3 * i * i) 42e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com}; 43e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.comstatic const double kiW[KERNEL + 1 + 1] = { 44e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 1. / 1089., // 1 / sum(i:0..6, j..6) K[i]*K[j] 45e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 1. / 1089., // 1 / sum(i:0..6, j..6) K[i]*K[j] 46e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 1. / 1056., // 1 / sum(i:0..5, j..6) K[i]*K[j] 47e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 1. / 957., // 1 / sum(i:0..4, j..6) K[i]*K[j] 48e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 1. / 726., // 1 / sum(i:0..3, j..6) K[i]*K[j] 49e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com}; 50e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 51b432b7da2d8dd78db1fb4621f1be1a3c12d49cb2fbarchard@google.com#if !defined(LIBYUV_DISABLE_X86) && defined(__SSE2__) 52e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 53e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#define PWEIGHT(A, B) static_cast<uint16>(K[(A)] * K[(B)]) // weight product 54e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#define MAKE_WEIGHT(L) \ 55e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com { { { PWEIGHT(L, 0), PWEIGHT(L, 1), PWEIGHT(L, 2), PWEIGHT(L, 3), \ 56e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com PWEIGHT(L, 4), PWEIGHT(L, 5), PWEIGHT(L, 6), 0 } } } 57e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 58e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com// We need this union trick to be able to initialize constant static __m128i 59e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com// values. We can't call _mm_set_epi16() for static compile-time initialization. 60e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.comstatic const struct { 61e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com union { 62e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com uint16 i16_[8]; 63e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com __m128i m_; 64e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } values_; 65e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com} W0 = MAKE_WEIGHT(0), 66e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com W1 = MAKE_WEIGHT(1), 67e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com W2 = MAKE_WEIGHT(2), 68e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com W3 = MAKE_WEIGHT(3); 69e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // ... the rest is symmetric. 70e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#undef MAKE_WEIGHT 71e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#undef PWEIGHT 72e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#endif 73e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 74e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com// Common final expression for SSIM, once the weighted sums are known. 75e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.comstatic double FinalizeSSIM(double iw, double xm, double ym, 76e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com double xxm, double xym, double yym) { 77e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const double iwx = xm * iw; 78e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const double iwy = ym * iw; 79e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com double sxx = xxm * iw - iwx * iwx; 80e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com double syy = yym * iw - iwy * iwy; 81e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // small errors are possible, due to rounding. Clamp to zero. 82e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com if (sxx < 0.) sxx = 0.; 83e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com if (syy < 0.) syy = 0.; 84e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const double sxsy = sqrt(sxx * syy); 85e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const double sxy = xym * iw - iwx * iwy; 86e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com static const double C11 = (0.01 * 0.01) * (255 * 255); 87e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com static const double C22 = (0.03 * 0.03) * (255 * 255); 88e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com static const double C33 = (0.015 * 0.015) * (255 * 255); 89e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const double l = (2. * iwx * iwy + C11) / (iwx * iwx + iwy * iwy + C11); 90e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const double c = (2. * sxsy + C22) / (sxx + syy + C22); 91e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const double s = (sxy + C33) / (sxsy + C33); 92e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com return l * c * s; 93e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com} 94e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 95e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com// GetSSIM() does clipping. GetSSIMFullKernel() does not 96e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 97e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com// TODO(skal): use summed tables? 98e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com// Note: worst case of accumulation is a weight of 33 = 11 + 2 * (7 + 3 + 1) 99e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com// with a diff of 255, squared. The maximum error is thus 0x4388241, 100e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com// which fits into 32 bits integers. 101e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.comdouble GetSSIM(const uint8 *org, const uint8 *rec, 102e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com int xo, int yo, int W, int H, int stride) { 103e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com uint32 ws = 0, xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0; 104e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com org += (yo - KERNEL) * stride; 105e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com org += (xo - KERNEL); 106e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com rec += (yo - KERNEL) * stride; 107e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com rec += (xo - KERNEL); 108e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com for (int y_ = 0; y_ < KERNEL_SIZE; ++y_, org += stride, rec += stride) { 109e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com if (((yo - KERNEL + y_) < 0) || ((yo - KERNEL + y_) >= H)) continue; 110e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int Wy = K[y_]; 111e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com for (int x_ = 0; x_ < KERNEL_SIZE; ++x_) { 112e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int Wxy = Wy * K[x_]; 113e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com if (((xo - KERNEL + x_) >= 0) && ((xo - KERNEL + x_) < W)) { 114e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int org_x = org[x_]; 115e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int rec_x = rec[x_]; 116e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com ws += Wxy; 117e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com xm += Wxy * org_x; 118e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com ym += Wxy * rec_x; 119e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com xxm += Wxy * org_x * org_x; 120e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com xym += Wxy * org_x * rec_x; 121e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com yym += Wxy * rec_x * rec_x; 122e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } 123e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } 124e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } 125e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com return FinalizeSSIM(1. / ws, xm, ym, xxm, xym, yym); 126e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com} 127e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 128e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.comdouble GetSSIMFullKernel(const uint8 *org, const uint8 *rec, 129e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com int xo, int yo, int stride, 130e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com double area_weight) { 131e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com uint32 xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0; 132e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 133b432b7da2d8dd78db1fb4621f1be1a3c12d49cb2fbarchard@google.com#if defined(LIBYUV_DISABLE_X86) || !defined(__SSE2__) 134e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 135e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com org += yo * stride + xo; 136e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com rec += yo * stride + xo; 137e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com for (int y = 1; y <= KERNEL; y++) { 138e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int dy1 = y * stride; 139e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int dy2 = y * stride; 140e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int Wy = K[KERNEL + y]; 141e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 142e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com for (int x = 1; x <= KERNEL; x++) { 143e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // Compute the contributions of upper-left (ul), upper-right (ur) 144e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // lower-left (ll) and lower-right (lr) points (see the diagram below). 145e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // Symmetric Kernel will have same weight on those points. 146e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // - - - - - - - 147e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // - ul - - - ur - 148e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // - - - - - - - 149e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // - - - 0 - - - 150e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // - - - - - - - 151e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // - ll - - - lr - 152e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // - - - - - - - 153e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int Wxy = Wy * K[KERNEL + x]; 154e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int ul1 = org[-dy1 - x]; 155e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int ur1 = org[-dy1 + x]; 156e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int ll1 = org[dy1 - x]; 157e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int lr1 = org[dy1 + x]; 158e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 159e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int ul2 = rec[-dy2 - x]; 160e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int ur2 = rec[-dy2 + x]; 161e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int ll2 = rec[dy2 - x]; 162e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int lr2 = rec[dy2 + x]; 163e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 164e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com xm += Wxy * (ul1 + ur1 + ll1 + lr1); 165e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com ym += Wxy * (ul2 + ur2 + ll2 + lr2); 166e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com xxm += Wxy * (ul1 * ul1 + ur1 * ur1 + ll1 * ll1 + lr1 * lr1); 167e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com xym += Wxy * (ul1 * ul2 + ur1 * ur2 + ll1 * ll2 + lr1 * lr2); 168e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com yym += Wxy * (ul2 * ul2 + ur2 * ur2 + ll2 * ll2 + lr2 * lr2); 169e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } 170e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 171e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // Compute the contributions of up (u), down (d), left (l) and right (r) 172e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // points across the main axes (see the diagram below). 173e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // Symmetric Kernel will have same weight on those points. 174e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // - - - - - - - 175e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // - - - u - - - 176e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // - - - - - - - 177e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // - l - 0 - r - 178e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // - - - - - - - 179e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // - - - d - - - 180e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // - - - - - - - 181e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int Wxy = Wy * K[KERNEL]; 182e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int u1 = org[-dy1]; 183e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int d1 = org[dy1]; 184e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int l1 = org[-y]; 185e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int r1 = org[y]; 186e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 187e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int u2 = rec[-dy2]; 188e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int d2 = rec[dy2]; 189e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int l2 = rec[-y]; 190e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int r2 = rec[y]; 191e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 192e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com xm += Wxy * (u1 + d1 + l1 + r1); 193e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com ym += Wxy * (u2 + d2 + l2 + r2); 194e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com xxm += Wxy * (u1 * u1 + d1 * d1 + l1 * l1 + r1 * r1); 195e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com xym += Wxy * (u1 * u2 + d1 * d2 + l1 * l2 + r1 * r2); 196e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com yym += Wxy * (u2 * u2 + d2 * d2 + l2 * l2 + r2 * r2); 197e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } 198e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 199e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // Lastly the contribution of (x0, y0) point. 200e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int Wxy = K[KERNEL] * K[KERNEL]; 201e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int s1 = org[0]; 202e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int s2 = rec[0]; 203e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 204e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com xm += Wxy * s1; 205e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com ym += Wxy * s2; 206e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com xxm += Wxy * s1 * s1; 207e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com xym += Wxy * s1 * s2; 208e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com yym += Wxy * s2 * s2; 209e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 210e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#else // __SSE2__ 211e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 212e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com org += (yo - KERNEL) * stride + (xo - KERNEL); 213e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com rec += (yo - KERNEL) * stride + (xo - KERNEL); 214e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 215e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const __m128i zero = _mm_setzero_si128(); 216e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com __m128i x = zero; 217e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com __m128i y = zero; 218e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com __m128i xx = zero; 219e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com __m128i xy = zero; 220e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com __m128i yy = zero; 221e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 222e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com// Read 8 pixels at line #L, and convert to 16bit, perform weighting 223e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com// and acccumulate. 224e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#define LOAD_LINE_PAIR(L, WEIGHT) do { \ 225e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const __m128i v0 = \ 226e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com _mm_loadl_epi64(reinterpret_cast<const __m128i*>(org + (L) * stride)); \ 227e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const __m128i v1 = \ 228e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com _mm_loadl_epi64(reinterpret_cast<const __m128i*>(rec + (L) * stride)); \ 229e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const __m128i w0 = _mm_unpacklo_epi8(v0, zero); \ 230e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const __m128i w1 = _mm_unpacklo_epi8(v1, zero); \ 231e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const __m128i ww0 = _mm_mullo_epi16(w0, (WEIGHT).values_.m_); \ 232e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const __m128i ww1 = _mm_mullo_epi16(w1, (WEIGHT).values_.m_); \ 233e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com x = _mm_add_epi32(x, _mm_unpacklo_epi16(ww0, zero)); \ 234e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com y = _mm_add_epi32(y, _mm_unpacklo_epi16(ww1, zero)); \ 235e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com x = _mm_add_epi32(x, _mm_unpackhi_epi16(ww0, zero)); \ 236e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com y = _mm_add_epi32(y, _mm_unpackhi_epi16(ww1, zero)); \ 237e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com xx = _mm_add_epi32(xx, _mm_madd_epi16(ww0, w0)); \ 238e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com xy = _mm_add_epi32(xy, _mm_madd_epi16(ww0, w1)); \ 239e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com yy = _mm_add_epi32(yy, _mm_madd_epi16(ww1, w1)); \ 240e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com} while (0) 241e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 242e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#define ADD_AND_STORE_FOUR_EPI32(M, OUT) do { \ 243e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com uint32 tmp[4]; \ 244e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com _mm_storeu_si128(reinterpret_cast<__m128i*>(tmp), (M)); \ 245e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com (OUT) = tmp[3] + tmp[2] + tmp[1] + tmp[0]; \ 246e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com} while (0) 247e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 248e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com LOAD_LINE_PAIR(0, W0); 249e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com LOAD_LINE_PAIR(1, W1); 250e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com LOAD_LINE_PAIR(2, W2); 251e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com LOAD_LINE_PAIR(3, W3); 252e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com LOAD_LINE_PAIR(4, W2); 253e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com LOAD_LINE_PAIR(5, W1); 254e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com LOAD_LINE_PAIR(6, W0); 255e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 256e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com ADD_AND_STORE_FOUR_EPI32(x, xm); 257e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com ADD_AND_STORE_FOUR_EPI32(y, ym); 258e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com ADD_AND_STORE_FOUR_EPI32(xx, xxm); 259e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com ADD_AND_STORE_FOUR_EPI32(xy, xym); 260e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com ADD_AND_STORE_FOUR_EPI32(yy, yym); 261e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 262e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#undef LOAD_LINE_PAIR 263e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#undef ADD_AND_STORE_FOUR_EPI32 264e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#endif 265e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 266e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com return FinalizeSSIM(area_weight, xm, ym, xxm, xym, yym); 267e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com} 268e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 269e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.comstatic int start_max(int x, int y) { return (x > y) ? x : y; } 270e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 271e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.comdouble CalcSSIM(const uint8 *org, const uint8 *rec, 272e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int image_width, const int image_height) { 273e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com double SSIM = 0.; 274e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int KERNEL_Y = (image_height < KERNEL) ? image_height : KERNEL; 275e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int KERNEL_X = (image_width < KERNEL) ? image_width : KERNEL; 276e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int start_x = start_max(image_width - 8 + KERNEL_X, KERNEL_X); 277e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int start_y = start_max(image_height - KERNEL_Y, KERNEL_Y); 278e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int stride = image_width; 279e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 280e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com for (int j = 0; j < KERNEL_Y; ++j) { 281e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com for (int i = 0; i < image_width; ++i) { 282e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com SSIM += GetSSIM(org, rec, i, j, image_width, image_height, stride); 283e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } 284e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } 285e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 286e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#ifdef _OPENMP 287e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com #pragma omp parallel for reduction(+: SSIM) 288e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com#endif 289e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com for (int j = KERNEL_Y; j < image_height - KERNEL_Y; ++j) { 290e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com for (int i = 0; i < KERNEL_X; ++i) { 291e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com SSIM += GetSSIM(org, rec, i, j, image_width, image_height, stride); 292e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } 293e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com for (int i = KERNEL_X; i < start_x; ++i) { 294e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com SSIM += GetSSIMFullKernel(org, rec, i, j, stride, kiW[0]); 295e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } 296e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com if (start_x < image_width) { 297e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // GetSSIMFullKernel() needs to be able to read 8 pixels (in SSE2). So we 298e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // copy the 8 rightmost pixels on a cache area, and pad this area with 299e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // zeros which won't contribute to the overall SSIM value (but we need 300e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // to pass the correct normalizing constant!). By using this cache, we can 301e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // still call GetSSIMFullKernel() instead of the slower GetSSIM(). 302e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com // NOTE: we could use similar method for the left-most pixels too. 303e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int kScratchWidth = 8; 304e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int kScratchStride = kScratchWidth + KERNEL + 1; 305e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com uint8 scratch_org[KERNEL_SIZE * kScratchStride] = { 0 }; 306e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com uint8 scratch_rec[KERNEL_SIZE * kScratchStride] = { 0 }; 307e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 308e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com for (int k = 0; k < KERNEL_SIZE; ++k) { 309e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com const int offset = 310e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com (j - KERNEL + k) * stride + image_width - kScratchWidth; 311e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com memcpy(scratch_org + k * kScratchStride, org + offset, kScratchWidth); 312e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com memcpy(scratch_rec + k * kScratchStride, rec + offset, kScratchWidth); 313e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } 314e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com for (int k = 0; k <= KERNEL_X + 1; ++k) { 315e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com SSIM += GetSSIMFullKernel(scratch_org, scratch_rec, 316e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com KERNEL + k, KERNEL, kScratchStride, kiW[k]); 317e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } 318e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } 319e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } 320e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 321e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com for (int j = start_y; j < image_height; ++j) { 322e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com for (int i = 0; i < image_width; ++i) { 323e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com SSIM += GetSSIM(org, rec, i, j, image_width, image_height, stride); 324e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } 325e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com } 326e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com return SSIM; 327e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com} 328e424a9de7a97543aed7789bdec3795567288eafffbarchard@google.com 329b7d674e305f53c83145405a9721a9e822afde004fbarchard@google.comdouble CalcLSSIM(double ssim) { 330b7d674e305f53c83145405a9721a9e822afde004fbarchard@google.com return -10.0 * log10(1.0 - ssim); 331b7d674e305f53c83145405a9721a9e822afde004fbarchard@google.com} 332b7d674e305f53c83145405a9721a9e822afde004fbarchard@google.com 33333d34eaad81ca9b3e05055f5a669c7ec3cd3fe61fbarchard@google.com#ifdef __cplusplus 33433d34eaad81ca9b3e05055f5a669c7ec3cd3fe61fbarchard@google.com} // extern "C" 33533d34eaad81ca9b3e05055f5a669c7ec3cd3fe61fbarchard@google.com#endif 33633d34eaad81ca9b3e05055f5a669c7ec3cd3fe61fbarchard@google.com 337