141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org/*
241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org *
441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org *  Use of this source code is governed by a BSD-style license
541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org *  that can be found in the LICENSE file in the root of the source
641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org *  tree. An additional intellectual property rights grant can be found
741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org *  in the file PATENTS. All contributing project authors may
841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org *  be found in the AUTHORS file in the root of the source tree.
941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org */
1041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
1141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#include "libyuv/compare.h"
1241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
1341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#include <float.h>
1441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#include <math.h>
1541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef _OPENMP
1641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#include <omp.h>
1741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif
1841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
1941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#include "libyuv/basic_types.h"
2041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#include "libyuv/cpu_id.h"
2141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#include "libyuv/row.h"
2241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
2341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef __cplusplus
2441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgnamespace libyuv {
2541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgextern "C" {
2641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif
2741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
2841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// hash seed of 5381 recommended.
2941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Internal C version of HashDjb2 with int sized count for efficiency.
3041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orguint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
3141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
3241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// This module is for Visual C x86
3341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#if !defined(LIBYUV_DISABLE_X86) && \
3441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    (defined(_M_IX86) || \
3541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
3641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define HAS_HASHDJB2_SSE41
3741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orguint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
3841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
3941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#if _MSC_VER >= 1700
4041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define HAS_HASHDJB2_AVX2
4141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orguint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
4241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif
4341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
4441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_HASHDJB2_SSE41
4541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
4641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// hash seed of 5381 recommended.
4741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgLIBYUV_API
4841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orguint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
4941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const int kBlockSize = 1 << 15;  // 32768;
5041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  int remainder;
5141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
5241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#if defined(HAS_HASHDJB2_SSE41)
5341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  if (TestCpuFlag(kCpuHasSSE41)) {
5441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    HashDjb2_SSE = HashDjb2_SSE41;
5541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
5641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif
5741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#if defined(HAS_HASHDJB2_AVX2)
5841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  if (TestCpuFlag(kCpuHasAVX2)) {
5941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    HashDjb2_SSE = HashDjb2_AVX2;
6041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
6141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif
6241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
6341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  while (count >= (uint64)(kBlockSize)) {
6441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    seed = HashDjb2_SSE(src, kBlockSize, seed);
6541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    src += kBlockSize;
6641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    count -= kBlockSize;
6741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
6841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  remainder = (int)(count) & ~15;
6941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  if (remainder) {
7041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    seed = HashDjb2_SSE(src, remainder, seed);
7141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    src += remainder;
7241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    count -= remainder;
7341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
7441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  remainder = (int)(count) & 15;
7541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  if (remainder) {
7641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    seed = HashDjb2_C(src, remainder, seed);
7741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
7841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  return seed;
7941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
8041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
8141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orguint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
8241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#if !defined(LIBYUV_DISABLE_NEON) && \
83d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
8441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define HAS_SUMSQUAREERROR_NEON
8541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orguint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
8641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif
8741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#if !defined(LIBYUV_DISABLE_X86) && \
8841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
8941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define HAS_SUMSQUAREERROR_SSE2
9041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orguint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
9141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif
9241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Visual C 2012 required for AVX2.
9341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700
9441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define HAS_SUMSQUAREERROR_AVX2
9541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orguint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
9641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif
9741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
9841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// TODO(fbarchard): Refactor into row function.
9941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgLIBYUV_API
10041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orguint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
10141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                             int count) {
10241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  // SumSquareError returns values 0 to 65535 for each squared difference.
10341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  // Up to 65536 of those can be summed and remain within a uint32.
10441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  // After each block of 65536 pixels, accumulate into a uint64.
10541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const int kBlockSize = 65536;
10641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  int remainder = count & (kBlockSize - 1) & ~31;
10741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  uint64 sse = 0;
10841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  int i;
10941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
11041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      SumSquareError_C;
11141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#if defined(HAS_SUMSQUAREERROR_NEON)
11241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  if (TestCpuFlag(kCpuHasNEON)) {
11341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    SumSquareError = SumSquareError_NEON;
11441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
11541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif
11641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#if defined(HAS_SUMSQUAREERROR_SSE2)
11741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  if (TestCpuFlag(kCpuHasSSE2) &&
11841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
11941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // Note only used for multiples of 16 so count is not checked.
12041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    SumSquareError = SumSquareError_SSE2;
12141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
12241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif
12341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#if defined(HAS_SUMSQUAREERROR_AVX2)
12441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  if (TestCpuFlag(kCpuHasAVX2)) {
12541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // Note only used for multiples of 32 so count is not checked.
12641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    SumSquareError = SumSquareError_AVX2;
12741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
12841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif
12941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef _OPENMP
13041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#pragma omp parallel for reduction(+: sse)
13141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif
13241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
13341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
13441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
13541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  src_a += count & ~(kBlockSize - 1);
13641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  src_b += count & ~(kBlockSize - 1);
13741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  if (remainder) {
13841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    sse += SumSquareError(src_a, src_b, remainder);
13941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    src_a += remainder;
14041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    src_b += remainder;
14141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
14241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  remainder = count & 31;
14341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  if (remainder) {
14441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    sse += SumSquareError_C(src_a, src_b, remainder);
14541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
14641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  return sse;
14741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
14841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
14941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgLIBYUV_API
15041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orguint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
15141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                                  const uint8* src_b, int stride_b,
15241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                                  int width, int height) {
15341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  uint64 sse = 0;
15441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  int h;
15541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  // Coalesce rows.
15641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  if (stride_a == width &&
15741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      stride_b == width) {
15841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    width *= height;
15941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    height = 1;
16041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    stride_a = stride_b = 0;
16141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
16241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  for (h = 0; h < height; ++h) {
16341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    sse += ComputeSumSquareError(src_a, src_b, width);
16441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    src_a += stride_a;
16541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    src_b += stride_b;
16641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
16741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  return sse;
16841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
16941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
17041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgLIBYUV_API
17141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgdouble SumSquareErrorToPsnr(uint64 sse, uint64 count) {
17241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  double psnr;
17341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  if (sse > 0) {
17441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    double mse = (double)(count) / (double)(sse);
17541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    psnr = 10.0 * log10(255.0 * 255.0 * mse);
17641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  } else {
17741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    psnr = kMaxPsnr;      // Limit to prevent divide by 0
17841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
17941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
18041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  if (psnr > kMaxPsnr)
18141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    psnr = kMaxPsnr;
18241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
18341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  return psnr;
18441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
18541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
18641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgLIBYUV_API
18741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgdouble CalcFramePsnr(const uint8* src_a, int stride_a,
18841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                     const uint8* src_b, int stride_b,
18941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                     int width, int height) {
19041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const uint64 samples = width * height;
19141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a,
19241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                                                src_b, stride_b,
19341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                                                width, height);
19441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  return SumSquareErrorToPsnr(sse, samples);
19541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
19641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
19741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgLIBYUV_API
19841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgdouble I420Psnr(const uint8* src_y_a, int stride_y_a,
19941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                const uint8* src_u_a, int stride_u_a,
20041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                const uint8* src_v_a, int stride_v_a,
20141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                const uint8* src_y_b, int stride_y_b,
20241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                const uint8* src_u_b, int stride_u_b,
20341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                const uint8* src_v_b, int stride_v_b,
20441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                int width, int height) {
20541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a,
20641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                                                  src_y_b, stride_y_b,
20741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                                                  width, height);
20841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const int width_uv = (width + 1) >> 1;
20941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const int height_uv = (height + 1) >> 1;
21041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a,
21141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                                                  src_u_b, stride_u_b,
21241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                                                  width_uv, height_uv);
21341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a,
21441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                                                  src_v_b, stride_v_b,
21541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                                                  width_uv, height_uv);
21641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const uint64 samples = width * height + 2 * (width_uv * height_uv);
21741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const uint64 sse = sse_y + sse_u + sse_v;
21841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  return SumSquareErrorToPsnr(sse, samples);
21941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
22041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
22141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgstatic const int64 cc1 =  26634;  // (64^2*(.01*255)^2
22241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgstatic const int64 cc2 = 239708;  // (64^2*(.03*255)^2
22341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
22441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgstatic double Ssim8x8_C(const uint8* src_a, int stride_a,
22541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_b, int stride_b) {
22641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  int64 sum_a = 0;
22741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  int64 sum_b = 0;
22841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  int64 sum_sq_a = 0;
22941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  int64 sum_sq_b = 0;
23041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  int64 sum_axb = 0;
23141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
23241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  int i;
23341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  for (i = 0; i < 8; ++i) {
23441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    int j;
23541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    for (j = 0; j < 8; ++j) {
23641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      sum_a += src_a[j];
23741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      sum_b += src_b[j];
23841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      sum_sq_a += src_a[j] * src_a[j];
23941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      sum_sq_b += src_b[j] * src_b[j];
24041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      sum_axb += src_a[j] * src_b[j];
24141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    }
24241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
24341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    src_a += stride_a;
24441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    src_b += stride_b;
24541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
24641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
24741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  {
24841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    const int64 count = 64;
24941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // scale the constants by number of pixels
25041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    const int64 c1 = (cc1 * count * count) >> 12;
25141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    const int64 c2 = (cc2 * count * count) >> 12;
25241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
25341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    const int64 sum_a_x_sum_b = sum_a * sum_b;
25441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
25541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
25641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
25741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
25841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    const int64 sum_a_sq = sum_a*sum_a;
25941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    const int64 sum_b_sq = sum_b*sum_b;
26041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
26141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) *
26241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         (count * sum_sq_a - sum_a_sq +
26341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          count * sum_sq_b - sum_b_sq + c2);
26441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
26541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    if (ssim_d == 0.0) {
26641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      return DBL_MAX;
26741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    }
26841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    return ssim_n * 1.0 / ssim_d;
26941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
27041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
27141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
27241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// We are using a 8x8 moving window with starting location of each 8x8 window
27341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
27441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// block boundaries to penalize blocking artifacts.
27541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgLIBYUV_API
27641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgdouble CalcFrameSsim(const uint8* src_a, int stride_a,
27741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                     const uint8* src_b, int stride_b,
27841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                     int width, int height) {
27941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  int samples = 0;
28041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  double ssim_total = 0;
28141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  double (*Ssim8x8)(const uint8* src_a, int stride_a,
28241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                    const uint8* src_b, int stride_b) = Ssim8x8_C;
28341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
28441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  // sample point start with each 4x4 location
28541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  int i;
28641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  for (i = 0; i < height - 8; i += 4) {
28741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    int j;
28841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    for (j = 0; j < width - 8; j += 4) {
28941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b);
29041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      samples++;
29141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    }
29241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
29341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    src_a += stride_a * 4;
29441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    src_b += stride_b * 4;
29541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
29641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
29741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  ssim_total /= samples;
29841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  return ssim_total;
29941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
30041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
30141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgLIBYUV_API
30241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgdouble I420Ssim(const uint8* src_y_a, int stride_y_a,
30341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                const uint8* src_u_a, int stride_u_a,
30441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                const uint8* src_v_a, int stride_v_a,
30541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                const uint8* src_y_b, int stride_y_b,
30641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                const uint8* src_u_b, int stride_u_b,
30741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                const uint8* src_v_b, int stride_v_b,
30841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                int width, int height) {
30941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a,
31041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                                      src_y_b, stride_y_b, width, height);
31141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const int width_uv = (width + 1) >> 1;
31241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const int height_uv = (height + 1) >> 1;
31341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a,
31441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                                      src_u_b, stride_u_b,
31541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                                      width_uv, height_uv);
31641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a,
31741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                                      src_v_b, stride_v_b,
31841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                                      width_uv, height_uv);
31941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
32041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
32141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
32241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef __cplusplus
32341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}  // extern "C"
32441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}  // namespace libyuv
32541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif
326