fastssim.c revision 7ce0a1d1337c01056ba24006efab21f00e179e04
1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 * 10 * This code was originally written by: Nathan E. Egge, at the Daala 11 * project. 12 */ 13#include <math.h> 14#include <stdlib.h> 15#include <string.h> 16#include "./vpx_config.h" 17#include "./vpx_dsp_rtcd.h" 18#include "vpx_dsp/ssim.h" 19#include "vpx_ports/system_state.h" 20/* TODO(jbb): High bit depth version of this code needed */ 21typedef struct fs_level fs_level; 22typedef struct fs_ctx fs_ctx; 23 24#define SSIM_C1 (255 * 255 * 0.01 * 0.01) 25#define SSIM_C2 (255 * 255 * 0.03 * 0.03) 26 27#define FS_MINI(_a, _b) ((_a) < (_b) ? (_a) : (_b)) 28#define FS_MAXI(_a, _b) ((_a) > (_b) ? (_a) : (_b)) 29 30struct fs_level { 31 uint16_t *im1; 32 uint16_t *im2; 33 double *ssim; 34 int w; 35 int h; 36}; 37 38struct fs_ctx { 39 fs_level *level; 40 int nlevels; 41 unsigned *col_buf; 42}; 43 44static void fs_ctx_init(fs_ctx *_ctx, int _w, int _h, int _nlevels) { 45 unsigned char *data; 46 size_t data_size; 47 int lw; 48 int lh; 49 int l; 50 lw = (_w + 1) >> 1; 51 lh = (_h + 1) >> 1; 52 data_size = _nlevels * sizeof(fs_level) 53 + 2 * (lw + 8) * 8 * sizeof(*_ctx->col_buf); 54 for (l = 0; l < _nlevels; l++) { 55 size_t im_size; 56 size_t level_size; 57 im_size = lw * (size_t) lh; 58 level_size = 2 * im_size * sizeof(*_ctx->level[l].im1); 59 level_size += sizeof(*_ctx->level[l].ssim) - 1; 60 level_size /= sizeof(*_ctx->level[l].ssim); 61 level_size += im_size; 62 level_size *= sizeof(*_ctx->level[l].ssim); 63 data_size += level_size; 64 lw = (lw + 1) >> 1; 65 lh = (lh + 1) >> 1; 66 } 67 data = (unsigned char *) malloc(data_size); 68 _ctx->level = (fs_level *) data; 69 _ctx->nlevels = _nlevels; 70 data += _nlevels * sizeof(*_ctx->level); 71 lw = (_w + 1) >> 1; 72 lh = (_h + 1) >> 1; 73 for (l = 0; l < _nlevels; l++) { 74 size_t im_size; 75 size_t level_size; 76 _ctx->level[l].w = lw; 77 _ctx->level[l].h = lh; 78 im_size = lw * (size_t) lh; 79 level_size = 2 * im_size * sizeof(*_ctx->level[l].im1); 80 level_size += sizeof(*_ctx->level[l].ssim) - 1; 81 level_size /= sizeof(*_ctx->level[l].ssim); 82 level_size *= sizeof(*_ctx->level[l].ssim); 83 _ctx->level[l].im1 = (uint16_t *) data; 84 _ctx->level[l].im2 = _ctx->level[l].im1 + im_size; 85 data += level_size; 86 _ctx->level[l].ssim = (double *) data; 87 data += im_size * sizeof(*_ctx->level[l].ssim); 88 lw = (lw + 1) >> 1; 89 lh = (lh + 1) >> 1; 90 } 91 _ctx->col_buf = (unsigned *) data; 92} 93 94static void fs_ctx_clear(fs_ctx *_ctx) { 95 free(_ctx->level); 96} 97 98static void fs_downsample_level(fs_ctx *_ctx, int _l) { 99 const uint16_t *src1; 100 const uint16_t *src2; 101 uint16_t *dst1; 102 uint16_t *dst2; 103 int w2; 104 int h2; 105 int w; 106 int h; 107 int i; 108 int j; 109 w = _ctx->level[_l].w; 110 h = _ctx->level[_l].h; 111 dst1 = _ctx->level[_l].im1; 112 dst2 = _ctx->level[_l].im2; 113 w2 = _ctx->level[_l - 1].w; 114 h2 = _ctx->level[_l - 1].h; 115 src1 = _ctx->level[_l - 1].im1; 116 src2 = _ctx->level[_l - 1].im2; 117 for (j = 0; j < h; j++) { 118 int j0offs; 119 int j1offs; 120 j0offs = 2 * j * w2; 121 j1offs = FS_MINI(2 * j + 1, h2) * w2; 122 for (i = 0; i < w; i++) { 123 int i0; 124 int i1; 125 i0 = 2 * i; 126 i1 = FS_MINI(i0 + 1, w2); 127 dst1[j * w + i] = src1[j0offs + i0] + src1[j0offs + i1] 128 + src1[j1offs + i0] + src1[j1offs + i1]; 129 dst2[j * w + i] = src2[j0offs + i0] + src2[j0offs + i1] 130 + src2[j1offs + i0] + src2[j1offs + i1]; 131 } 132 } 133} 134 135static void fs_downsample_level0(fs_ctx *_ctx, const unsigned char *_src1, 136 int _s1ystride, const unsigned char *_src2, 137 int _s2ystride, int _w, int _h) { 138 uint16_t *dst1; 139 uint16_t *dst2; 140 int w; 141 int h; 142 int i; 143 int j; 144 w = _ctx->level[0].w; 145 h = _ctx->level[0].h; 146 dst1 = _ctx->level[0].im1; 147 dst2 = _ctx->level[0].im2; 148 for (j = 0; j < h; j++) { 149 int j0; 150 int j1; 151 j0 = 2 * j; 152 j1 = FS_MINI(j0 + 1, _h); 153 for (i = 0; i < w; i++) { 154 int i0; 155 int i1; 156 i0 = 2 * i; 157 i1 = FS_MINI(i0 + 1, _w); 158 dst1[j * w + i] = _src1[j0 * _s1ystride + i0] 159 + _src1[j0 * _s1ystride + i1] + _src1[j1 * _s1ystride + i0] 160 + _src1[j1 * _s1ystride + i1]; 161 dst2[j * w + i] = _src2[j0 * _s2ystride + i0] 162 + _src2[j0 * _s2ystride + i1] + _src2[j1 * _s2ystride + i0] 163 + _src2[j1 * _s2ystride + i1]; 164 } 165 } 166} 167 168static void fs_apply_luminance(fs_ctx *_ctx, int _l) { 169 unsigned *col_sums_x; 170 unsigned *col_sums_y; 171 uint16_t *im1; 172 uint16_t *im2; 173 double *ssim; 174 double c1; 175 int w; 176 int h; 177 int j0offs; 178 int j1offs; 179 int i; 180 int j; 181 w = _ctx->level[_l].w; 182 h = _ctx->level[_l].h; 183 col_sums_x = _ctx->col_buf; 184 col_sums_y = col_sums_x + w; 185 im1 = _ctx->level[_l].im1; 186 im2 = _ctx->level[_l].im2; 187 for (i = 0; i < w; i++) 188 col_sums_x[i] = 5 * im1[i]; 189 for (i = 0; i < w; i++) 190 col_sums_y[i] = 5 * im2[i]; 191 for (j = 1; j < 4; j++) { 192 j1offs = FS_MINI(j, h - 1) * w; 193 for (i = 0; i < w; i++) 194 col_sums_x[i] += im1[j1offs + i]; 195 for (i = 0; i < w; i++) 196 col_sums_y[i] += im2[j1offs + i]; 197 } 198 ssim = _ctx->level[_l].ssim; 199 c1 = (double) (SSIM_C1 * 4096 * (1 << 4 * _l)); 200 for (j = 0; j < h; j++) { 201 unsigned mux; 202 unsigned muy; 203 int i0; 204 int i1; 205 mux = 5 * col_sums_x[0]; 206 muy = 5 * col_sums_y[0]; 207 for (i = 1; i < 4; i++) { 208 i1 = FS_MINI(i, w - 1); 209 mux += col_sums_x[i1]; 210 muy += col_sums_y[i1]; 211 } 212 for (i = 0; i < w; i++) { 213 ssim[j * w + i] *= (2 * mux * (double) muy + c1) 214 / (mux * (double) mux + muy * (double) muy + c1); 215 if (i + 1 < w) { 216 i0 = FS_MAXI(0, i - 4); 217 i1 = FS_MINI(i + 4, w - 1); 218 mux += col_sums_x[i1] - col_sums_x[i0]; 219 muy += col_sums_x[i1] - col_sums_x[i0]; 220 } 221 } 222 if (j + 1 < h) { 223 j0offs = FS_MAXI(0, j - 4) * w; 224 for (i = 0; i < w; i++) 225 col_sums_x[i] -= im1[j0offs + i]; 226 for (i = 0; i < w; i++) 227 col_sums_y[i] -= im2[j0offs + i]; 228 j1offs = FS_MINI(j + 4, h - 1) * w; 229 for (i = 0; i < w; i++) 230 col_sums_x[i] += im1[j1offs + i]; 231 for (i = 0; i < w; i++) 232 col_sums_y[i] += im2[j1offs + i]; 233 } 234 } 235} 236 237#define FS_COL_SET(_col, _joffs, _ioffs) \ 238 do { \ 239 unsigned gx; \ 240 unsigned gy; \ 241 gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ 242 gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ 243 col_sums_gx2[(_col)] = gx * (double)gx; \ 244 col_sums_gy2[(_col)] = gy * (double)gy; \ 245 col_sums_gxgy[(_col)] = gx * (double)gy; \ 246 } \ 247 while (0) 248 249#define FS_COL_ADD(_col, _joffs, _ioffs) \ 250 do { \ 251 unsigned gx; \ 252 unsigned gy; \ 253 gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ 254 gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ 255 col_sums_gx2[(_col)] += gx * (double)gx; \ 256 col_sums_gy2[(_col)] += gy * (double)gy; \ 257 col_sums_gxgy[(_col)] += gx * (double)gy; \ 258 } \ 259 while (0) 260 261#define FS_COL_SUB(_col, _joffs, _ioffs) \ 262 do { \ 263 unsigned gx; \ 264 unsigned gy; \ 265 gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ 266 gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ 267 col_sums_gx2[(_col)] -= gx * (double)gx; \ 268 col_sums_gy2[(_col)] -= gy * (double)gy; \ 269 col_sums_gxgy[(_col)] -= gx * (double)gy; \ 270 } \ 271 while (0) 272 273#define FS_COL_COPY(_col1, _col2) \ 274 do { \ 275 col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)]; \ 276 col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)]; \ 277 col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)]; \ 278 } \ 279 while (0) 280 281#define FS_COL_HALVE(_col1, _col2) \ 282 do { \ 283 col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)] * 0.5; \ 284 col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)] * 0.5; \ 285 col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)] * 0.5; \ 286 } \ 287 while (0) 288 289#define FS_COL_DOUBLE(_col1, _col2) \ 290 do { \ 291 col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)] * 2; \ 292 col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)] * 2; \ 293 col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)] * 2; \ 294 } \ 295 while (0) 296 297static void fs_calc_structure(fs_ctx *_ctx, int _l) { 298 uint16_t *im1; 299 uint16_t *im2; 300 unsigned *gx_buf; 301 unsigned *gy_buf; 302 double *ssim; 303 double col_sums_gx2[8]; 304 double col_sums_gy2[8]; 305 double col_sums_gxgy[8]; 306 double c2; 307 int stride; 308 int w; 309 int h; 310 int i; 311 int j; 312 w = _ctx->level[_l].w; 313 h = _ctx->level[_l].h; 314 im1 = _ctx->level[_l].im1; 315 im2 = _ctx->level[_l].im2; 316 ssim = _ctx->level[_l].ssim; 317 gx_buf = _ctx->col_buf; 318 stride = w + 8; 319 gy_buf = gx_buf + 8 * stride; 320 memset(gx_buf, 0, 2 * 8 * stride * sizeof(*gx_buf)); 321 c2 = SSIM_C2 * (1 << 4 * _l) * 16 * 104; 322 for (j = 0; j < h + 4; j++) { 323 if (j < h - 1) { 324 for (i = 0; i < w - 1; i++) { 325 unsigned g1; 326 unsigned g2; 327 unsigned gx; 328 unsigned gy; 329 g1 = abs(im1[(j + 1) * w + i + 1] - im1[j * w + i]); 330 g2 = abs(im1[(j + 1) * w + i] - im1[j * w + i + 1]); 331 gx = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2); 332 g1 = abs(im2[(j + 1) * w + i + 1] - im2[j * w + i]); 333 g2 = abs(im2[(j + 1) * w + i] - im2[j * w + i + 1]); 334 gy = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2); 335 gx_buf[(j & 7) * stride + i + 4] = gx; 336 gy_buf[(j & 7) * stride + i + 4] = gy; 337 } 338 } else { 339 memset(gx_buf + (j & 7) * stride, 0, stride * sizeof(*gx_buf)); 340 memset(gy_buf + (j & 7) * stride, 0, stride * sizeof(*gy_buf)); 341 } 342 if (j >= 4) { 343 int k; 344 col_sums_gx2[3] = col_sums_gx2[2] = col_sums_gx2[1] = col_sums_gx2[0] = 0; 345 col_sums_gy2[3] = col_sums_gy2[2] = col_sums_gy2[1] = col_sums_gy2[0] = 0; 346 col_sums_gxgy[3] = col_sums_gxgy[2] = col_sums_gxgy[1] = 347 col_sums_gxgy[0] = 0; 348 for (i = 4; i < 8; i++) { 349 FS_COL_SET(i, -1, 0); 350 FS_COL_ADD(i, 0, 0); 351 for (k = 1; k < 8 - i; k++) { 352 FS_COL_DOUBLE(i, i); 353 FS_COL_ADD(i, -k - 1, 0); 354 FS_COL_ADD(i, k, 0); 355 } 356 } 357 for (i = 0; i < w; i++) { 358 double mugx2; 359 double mugy2; 360 double mugxgy; 361 mugx2 = col_sums_gx2[0]; 362 for (k = 1; k < 8; k++) 363 mugx2 += col_sums_gx2[k]; 364 mugy2 = col_sums_gy2[0]; 365 for (k = 1; k < 8; k++) 366 mugy2 += col_sums_gy2[k]; 367 mugxgy = col_sums_gxgy[0]; 368 for (k = 1; k < 8; k++) 369 mugxgy += col_sums_gxgy[k]; 370 ssim[(j - 4) * w + i] = (2 * mugxgy + c2) / (mugx2 + mugy2 + c2); 371 if (i + 1 < w) { 372 FS_COL_SET(0, -1, 1); 373 FS_COL_ADD(0, 0, 1); 374 FS_COL_SUB(2, -3, 2); 375 FS_COL_SUB(2, 2, 2); 376 FS_COL_HALVE(1, 2); 377 FS_COL_SUB(3, -4, 3); 378 FS_COL_SUB(3, 3, 3); 379 FS_COL_HALVE(2, 3); 380 FS_COL_COPY(3, 4); 381 FS_COL_DOUBLE(4, 5); 382 FS_COL_ADD(4, -4, 5); 383 FS_COL_ADD(4, 3, 5); 384 FS_COL_DOUBLE(5, 6); 385 FS_COL_ADD(5, -3, 6); 386 FS_COL_ADD(5, 2, 6); 387 FS_COL_DOUBLE(6, 7); 388 FS_COL_ADD(6, -2, 7); 389 FS_COL_ADD(6, 1, 7); 390 FS_COL_SET(7, -1, 8); 391 FS_COL_ADD(7, 0, 8); 392 } 393 } 394 } 395 } 396} 397 398#define FS_NLEVELS (4) 399 400/*These weights were derived from the default weights found in Wang's original 401 Matlab implementation: {0.0448, 0.2856, 0.2363, 0.1333}. 402 We drop the finest scale and renormalize the rest to sum to 1.*/ 403 404static const double FS_WEIGHTS[FS_NLEVELS] = {0.2989654541015625, 405 0.3141326904296875, 0.2473602294921875, 0.1395416259765625}; 406 407static double fs_average(fs_ctx *_ctx, int _l) { 408 double *ssim; 409 double ret; 410 int w; 411 int h; 412 int i; 413 int j; 414 w = _ctx->level[_l].w; 415 h = _ctx->level[_l].h; 416 ssim = _ctx->level[_l].ssim; 417 ret = 0; 418 for (j = 0; j < h; j++) 419 for (i = 0; i < w; i++) 420 ret += ssim[j * w + i]; 421 return pow(ret / (w * h), FS_WEIGHTS[_l]); 422} 423 424static double calc_ssim(const unsigned char *_src, int _systride, 425 const unsigned char *_dst, int _dystride, int _w, int _h) { 426 fs_ctx ctx; 427 double ret; 428 int l; 429 ret = 1; 430 fs_ctx_init(&ctx, _w, _h, FS_NLEVELS); 431 fs_downsample_level0(&ctx, _src, _systride, _dst, _dystride, _w, _h); 432 for (l = 0; l < FS_NLEVELS - 1; l++) { 433 fs_calc_structure(&ctx, l); 434 ret *= fs_average(&ctx, l); 435 fs_downsample_level(&ctx, l + 1); 436 } 437 fs_calc_structure(&ctx, l); 438 fs_apply_luminance(&ctx, l); 439 ret *= fs_average(&ctx, l); 440 fs_ctx_clear(&ctx); 441 return ret; 442} 443 444static double convert_ssim_db(double _ssim, double _weight) { 445 return 10 * (log10(_weight) - log10(_weight - _ssim)); 446} 447 448double vpx_calc_fastssim(const YV12_BUFFER_CONFIG *source, 449 const YV12_BUFFER_CONFIG *dest, 450 double *ssim_y, double *ssim_u, double *ssim_v) { 451 double ssimv; 452 vpx_clear_system_state(); 453 454 *ssim_y = calc_ssim(source->y_buffer, source->y_stride, dest->y_buffer, 455 dest->y_stride, source->y_crop_width, 456 source->y_crop_height); 457 458 *ssim_u = calc_ssim(source->u_buffer, source->uv_stride, dest->u_buffer, 459 dest->uv_stride, source->uv_crop_width, 460 source->uv_crop_height); 461 462 *ssim_v = calc_ssim(source->v_buffer, source->uv_stride, dest->v_buffer, 463 dest->uv_stride, source->uv_crop_width, 464 source->uv_crop_height); 465 ssimv = (*ssim_y) * .8 + .1 * ((*ssim_u) + (*ssim_v)); 466 467 return convert_ssim_db(ssimv, 1.0); 468} 469