1/* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "libyuv/compare.h" 12 13#include <float.h> 14#include <math.h> 15#ifdef _OPENMP 16#include <omp.h> 17#endif 18 19#include "libyuv/basic_types.h" 20#include "libyuv/cpu_id.h" 21#include "libyuv/row.h" 22#include "libyuv/video_common.h" 23 24#ifdef __cplusplus 25namespace libyuv { 26extern "C" { 27#endif 28 29// hash seed of 5381 recommended. 30// Internal C version of HashDjb2 with int sized count for efficiency. 31uint32 HashDjb2_C(const uint8* src, int count, uint32 seed); 32 33// This module is for Visual C x86 34#if !defined(LIBYUV_DISABLE_X86) && \ 35 (defined(_M_IX86) || \ 36 (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))) 37#define HAS_HASHDJB2_SSE41 38uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed); 39 40#ifdef VISUALC_HAS_AVX2 41#define HAS_HASHDJB2_AVX2 42uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed); 43#endif 44 45#endif // HAS_HASHDJB2_SSE41 46 47// hash seed of 5381 recommended. 48LIBYUV_API 49uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { 50 const int kBlockSize = 1 << 15; // 32768; 51 int remainder; 52 uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C; 53#if defined(HAS_HASHDJB2_SSE41) 54 if (TestCpuFlag(kCpuHasSSE41)) { 55 HashDjb2_SSE = HashDjb2_SSE41; 56 } 57#endif 58#if defined(HAS_HASHDJB2_AVX2) 59 if (TestCpuFlag(kCpuHasAVX2)) { 60 HashDjb2_SSE = HashDjb2_AVX2; 61 } 62#endif 63 64 while (count >= (uint64)(kBlockSize)) { 65 seed = HashDjb2_SSE(src, kBlockSize, seed); 66 src += kBlockSize; 67 count -= kBlockSize; 68 } 69 remainder = (int)(count) & ~15; 70 if (remainder) { 71 seed = HashDjb2_SSE(src, remainder, seed); 72 src += remainder; 73 count -= remainder; 74 } 75 remainder = (int)(count) & 15; 76 if (remainder) { 77 seed = HashDjb2_C(src, remainder, seed); 78 } 79 return seed; 80} 81 82static uint32 ARGBDetectRow_C(const uint8* argb, int width) { 83 int x; 84 for (x = 0; x < width - 1; x += 2) { 85 if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB. 86 return FOURCC_BGRA; 87 } 88 if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA. 89 return FOURCC_ARGB; 90 } 91 if (argb[4] != 255) { // Second pixel first byte is not Alpha of 255. 92 return FOURCC_BGRA; 93 } 94 if (argb[7] != 255) { // Second pixel 4th byte is not Alpha of 255. 95 return FOURCC_ARGB; 96 } 97 argb += 8; 98 } 99 if (width & 1) { 100 if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB. 101 return FOURCC_BGRA; 102 } 103 if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA. 104 return FOURCC_ARGB; 105 } 106 } 107 return 0; 108} 109 110// Scan an opaque argb image and return fourcc based on alpha offset. 111// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown. 112LIBYUV_API 113uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) { 114 uint32 fourcc = 0; 115 int h; 116 117 // Coalesce rows. 118 if (stride_argb == width * 4) { 119 width *= height; 120 height = 1; 121 stride_argb = 0; 122 } 123 for (h = 0; h < height && fourcc == 0; ++h) { 124 fourcc = ARGBDetectRow_C(argb, width); 125 argb += stride_argb; 126 } 127 return fourcc; 128} 129 130uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count); 131#if !defined(LIBYUV_DISABLE_NEON) && \ 132 (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) 133#define HAS_SUMSQUAREERROR_NEON 134uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count); 135#endif 136#if !defined(LIBYUV_DISABLE_X86) && \ 137 (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) 138#define HAS_SUMSQUAREERROR_SSE2 139uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count); 140#endif 141 142#ifdef VISUALC_HAS_AVX2 143#define HAS_SUMSQUAREERROR_AVX2 144uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count); 145#endif 146 147// TODO(fbarchard): Refactor into row function. 148LIBYUV_API 149uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, 150 int count) { 151 // SumSquareError returns values 0 to 65535 for each squared difference. 152 // Up to 65536 of those can be summed and remain within a uint32. 153 // After each block of 65536 pixels, accumulate into a uint64. 154 const int kBlockSize = 65536; 155 int remainder = count & (kBlockSize - 1) & ~31; 156 uint64 sse = 0; 157 int i; 158 uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = 159 SumSquareError_C; 160#if defined(HAS_SUMSQUAREERROR_NEON) 161 if (TestCpuFlag(kCpuHasNEON)) { 162 SumSquareError = SumSquareError_NEON; 163 } 164#endif 165#if defined(HAS_SUMSQUAREERROR_SSE2) 166 if (TestCpuFlag(kCpuHasSSE2)) { 167 // Note only used for multiples of 16 so count is not checked. 168 SumSquareError = SumSquareError_SSE2; 169 } 170#endif 171#if defined(HAS_SUMSQUAREERROR_AVX2) 172 if (TestCpuFlag(kCpuHasAVX2)) { 173 // Note only used for multiples of 32 so count is not checked. 174 SumSquareError = SumSquareError_AVX2; 175 } 176#endif 177#ifdef _OPENMP 178#pragma omp parallel for reduction(+: sse) 179#endif 180 for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { 181 sse += SumSquareError(src_a + i, src_b + i, kBlockSize); 182 } 183 src_a += count & ~(kBlockSize - 1); 184 src_b += count & ~(kBlockSize - 1); 185 if (remainder) { 186 sse += SumSquareError(src_a, src_b, remainder); 187 src_a += remainder; 188 src_b += remainder; 189 } 190 remainder = count & 31; 191 if (remainder) { 192 sse += SumSquareError_C(src_a, src_b, remainder); 193 } 194 return sse; 195} 196 197LIBYUV_API 198uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, 199 const uint8* src_b, int stride_b, 200 int width, int height) { 201 uint64 sse = 0; 202 int h; 203 // Coalesce rows. 204 if (stride_a == width && 205 stride_b == width) { 206 width *= height; 207 height = 1; 208 stride_a = stride_b = 0; 209 } 210 for (h = 0; h < height; ++h) { 211 sse += ComputeSumSquareError(src_a, src_b, width); 212 src_a += stride_a; 213 src_b += stride_b; 214 } 215 return sse; 216} 217 218LIBYUV_API 219double SumSquareErrorToPsnr(uint64 sse, uint64 count) { 220 double psnr; 221 if (sse > 0) { 222 double mse = (double)(count) / (double)(sse); 223 psnr = 10.0 * log10(255.0 * 255.0 * mse); 224 } else { 225 psnr = kMaxPsnr; // Limit to prevent divide by 0 226 } 227 228 if (psnr > kMaxPsnr) 229 psnr = kMaxPsnr; 230 231 return psnr; 232} 233 234LIBYUV_API 235double CalcFramePsnr(const uint8* src_a, int stride_a, 236 const uint8* src_b, int stride_b, 237 int width, int height) { 238 const uint64 samples = width * height; 239 const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a, 240 src_b, stride_b, 241 width, height); 242 return SumSquareErrorToPsnr(sse, samples); 243} 244 245LIBYUV_API 246double I420Psnr(const uint8* src_y_a, int stride_y_a, 247 const uint8* src_u_a, int stride_u_a, 248 const uint8* src_v_a, int stride_v_a, 249 const uint8* src_y_b, int stride_y_b, 250 const uint8* src_u_b, int stride_u_b, 251 const uint8* src_v_b, int stride_v_b, 252 int width, int height) { 253 const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a, 254 src_y_b, stride_y_b, 255 width, height); 256 const int width_uv = (width + 1) >> 1; 257 const int height_uv = (height + 1) >> 1; 258 const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a, 259 src_u_b, stride_u_b, 260 width_uv, height_uv); 261 const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a, 262 src_v_b, stride_v_b, 263 width_uv, height_uv); 264 const uint64 samples = width * height + 2 * (width_uv * height_uv); 265 const uint64 sse = sse_y + sse_u + sse_v; 266 return SumSquareErrorToPsnr(sse, samples); 267} 268 269static const int64 cc1 = 26634; // (64^2*(.01*255)^2 270static const int64 cc2 = 239708; // (64^2*(.03*255)^2 271 272static double Ssim8x8_C(const uint8* src_a, int stride_a, 273 const uint8* src_b, int stride_b) { 274 int64 sum_a = 0; 275 int64 sum_b = 0; 276 int64 sum_sq_a = 0; 277 int64 sum_sq_b = 0; 278 int64 sum_axb = 0; 279 280 int i; 281 for (i = 0; i < 8; ++i) { 282 int j; 283 for (j = 0; j < 8; ++j) { 284 sum_a += src_a[j]; 285 sum_b += src_b[j]; 286 sum_sq_a += src_a[j] * src_a[j]; 287 sum_sq_b += src_b[j] * src_b[j]; 288 sum_axb += src_a[j] * src_b[j]; 289 } 290 291 src_a += stride_a; 292 src_b += stride_b; 293 } 294 295 { 296 const int64 count = 64; 297 // scale the constants by number of pixels 298 const int64 c1 = (cc1 * count * count) >> 12; 299 const int64 c2 = (cc2 * count * count) >> 12; 300 301 const int64 sum_a_x_sum_b = sum_a * sum_b; 302 303 const int64 ssim_n = (2 * sum_a_x_sum_b + c1) * 304 (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2); 305 306 const int64 sum_a_sq = sum_a*sum_a; 307 const int64 sum_b_sq = sum_b*sum_b; 308 309 const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) * 310 (count * sum_sq_a - sum_a_sq + 311 count * sum_sq_b - sum_b_sq + c2); 312 313 if (ssim_d == 0.0) { 314 return DBL_MAX; 315 } 316 return ssim_n * 1.0 / ssim_d; 317 } 318} 319 320// We are using a 8x8 moving window with starting location of each 8x8 window 321// on the 4x4 pixel grid. Such arrangement allows the windows to overlap 322// block boundaries to penalize blocking artifacts. 323LIBYUV_API 324double CalcFrameSsim(const uint8* src_a, int stride_a, 325 const uint8* src_b, int stride_b, 326 int width, int height) { 327 int samples = 0; 328 double ssim_total = 0; 329 double (*Ssim8x8)(const uint8* src_a, int stride_a, 330 const uint8* src_b, int stride_b) = Ssim8x8_C; 331 332 // sample point start with each 4x4 location 333 int i; 334 for (i = 0; i < height - 8; i += 4) { 335 int j; 336 for (j = 0; j < width - 8; j += 4) { 337 ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b); 338 samples++; 339 } 340 341 src_a += stride_a * 4; 342 src_b += stride_b * 4; 343 } 344 345 ssim_total /= samples; 346 return ssim_total; 347} 348 349LIBYUV_API 350double I420Ssim(const uint8* src_y_a, int stride_y_a, 351 const uint8* src_u_a, int stride_u_a, 352 const uint8* src_v_a, int stride_v_a, 353 const uint8* src_y_b, int stride_y_b, 354 const uint8* src_u_b, int stride_u_b, 355 const uint8* src_v_b, int stride_v_b, 356 int width, int height) { 357 const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a, 358 src_y_b, stride_y_b, width, height); 359 const int width_uv = (width + 1) >> 1; 360 const int height_uv = (height + 1) >> 1; 361 const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a, 362 src_u_b, stride_u_b, 363 width_uv, height_uv); 364 const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a, 365 src_v_b, stride_v_b, 366 width_uv, height_uv); 367 return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v); 368} 369 370#ifdef __cplusplus 371} // extern "C" 372} // namespace libyuv 373#endif 374