1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "./vpx_dsp_rtcd.h" 12 13#include "vpx_config.h" 14#include "vp8_rtcd.h" 15#include "encodemb.h" 16#include "vp8/common/reconinter.h" 17#include "vp8/encoder/quantize.h" 18#include "tokenize.h" 19#include "vp8/common/invtrans.h" 20#include "vpx_mem/vpx_mem.h" 21#include "rdopt.h" 22 23void vp8_subtract_b(BLOCK *be, BLOCKD *bd, int pitch) { 24 unsigned char *src_ptr = (*(be->base_src) + be->src); 25 short *diff_ptr = be->src_diff; 26 unsigned char *pred_ptr = bd->predictor; 27 int src_stride = be->src_stride; 28 29 vpx_subtract_block(4, 4, diff_ptr, pitch, src_ptr, src_stride, 30 pred_ptr, pitch); 31} 32 33void vp8_subtract_mbuv(short *diff, unsigned char *usrc, unsigned char *vsrc, 34 int src_stride, unsigned char *upred, 35 unsigned char *vpred, int pred_stride) { 36 short *udiff = diff + 256; 37 short *vdiff = diff + 320; 38 39 vpx_subtract_block(8, 8, udiff, 8, usrc, src_stride, upred, pred_stride); 40 vpx_subtract_block(8, 8, vdiff, 8, vsrc, src_stride, vpred, pred_stride); 41} 42 43void vp8_subtract_mby(short *diff, unsigned char *src, int src_stride, 44 unsigned char *pred, int pred_stride) { 45 vpx_subtract_block(16, 16, diff, 16, src, src_stride, pred, pred_stride); 46} 47 48static void vp8_subtract_mb(MACROBLOCK *x) 49{ 50 BLOCK *b = &x->block[0]; 51 52 vp8_subtract_mby(x->src_diff, *(b->base_src), 53 b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride); 54 vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, 55 x->src.v_buffer, x->src.uv_stride, x->e_mbd.dst.u_buffer, 56 x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride); 57} 58 59static void build_dcblock(MACROBLOCK *x) 60{ 61 short *src_diff_ptr = &x->src_diff[384]; 62 int i; 63 64 for (i = 0; i < 16; i++) 65 { 66 src_diff_ptr[i] = x->coeff[i * 16]; 67 } 68} 69 70void vp8_transform_mbuv(MACROBLOCK *x) 71{ 72 int i; 73 74 for (i = 16; i < 24; i += 2) 75 { 76 x->short_fdct8x4(&x->block[i].src_diff[0], 77 &x->block[i].coeff[0], 16); 78 } 79} 80 81 82void vp8_transform_intra_mby(MACROBLOCK *x) 83{ 84 int i; 85 86 for (i = 0; i < 16; i += 2) 87 { 88 x->short_fdct8x4(&x->block[i].src_diff[0], 89 &x->block[i].coeff[0], 32); 90 } 91 92 /* build dc block from 16 y dc values */ 93 build_dcblock(x); 94 95 /* do 2nd order transform on the dc block */ 96 x->short_walsh4x4(&x->block[24].src_diff[0], 97 &x->block[24].coeff[0], 8); 98 99} 100 101 102static void transform_mb(MACROBLOCK *x) 103{ 104 int i; 105 106 for (i = 0; i < 16; i += 2) 107 { 108 x->short_fdct8x4(&x->block[i].src_diff[0], 109 &x->block[i].coeff[0], 32); 110 } 111 112 /* build dc block from 16 y dc values */ 113 if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) 114 build_dcblock(x); 115 116 for (i = 16; i < 24; i += 2) 117 { 118 x->short_fdct8x4(&x->block[i].src_diff[0], 119 &x->block[i].coeff[0], 16); 120 } 121 122 /* do 2nd order transform on the dc block */ 123 if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) 124 x->short_walsh4x4(&x->block[24].src_diff[0], 125 &x->block[24].coeff[0], 8); 126 127} 128 129 130static void transform_mby(MACROBLOCK *x) 131{ 132 int i; 133 134 for (i = 0; i < 16; i += 2) 135 { 136 x->short_fdct8x4(&x->block[i].src_diff[0], 137 &x->block[i].coeff[0], 32); 138 } 139 140 /* build dc block from 16 y dc values */ 141 if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) 142 { 143 build_dcblock(x); 144 x->short_walsh4x4(&x->block[24].src_diff[0], 145 &x->block[24].coeff[0], 8); 146 } 147} 148 149 150 151#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) 152 153typedef struct vp8_token_state vp8_token_state; 154 155struct vp8_token_state{ 156 int rate; 157 int error; 158 signed char next; 159 signed char token; 160 short qc; 161}; 162 163/* TODO: experiments to find optimal multiple numbers */ 164#define Y1_RD_MULT 4 165#define UV_RD_MULT 2 166#define Y2_RD_MULT 16 167 168static const int plane_rd_mult[4]= 169{ 170 Y1_RD_MULT, 171 Y2_RD_MULT, 172 UV_RD_MULT, 173 Y1_RD_MULT 174}; 175 176static void optimize_b(MACROBLOCK *mb, int ib, int type, 177 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) 178{ 179 BLOCK *b; 180 BLOCKD *d; 181 vp8_token_state tokens[17][2]; 182 unsigned best_mask[2]; 183 const short *dequant_ptr; 184 const short *coeff_ptr; 185 short *qcoeff_ptr; 186 short *dqcoeff_ptr; 187 int eob; 188 int i0; 189 int rc; 190 int x; 191 int sz = 0; 192 int next; 193 int rdmult; 194 int rddiv; 195 int final_eob; 196 int rd_cost0; 197 int rd_cost1; 198 int rate0; 199 int rate1; 200 int error0; 201 int error1; 202 int t0; 203 int t1; 204 int best; 205 int band; 206 int pt; 207 int i; 208 int err_mult = plane_rd_mult[type]; 209 210 b = &mb->block[ib]; 211 d = &mb->e_mbd.block[ib]; 212 213 dequant_ptr = d->dequant; 214 coeff_ptr = b->coeff; 215 qcoeff_ptr = d->qcoeff; 216 dqcoeff_ptr = d->dqcoeff; 217 i0 = !type; 218 eob = *d->eob; 219 220 /* Now set up a Viterbi trellis to evaluate alternative roundings. */ 221 rdmult = mb->rdmult * err_mult; 222 if(mb->e_mbd.mode_info_context->mbmi.ref_frame==INTRA_FRAME) 223 rdmult = (rdmult * 9)>>4; 224 225 rddiv = mb->rddiv; 226 best_mask[0] = best_mask[1] = 0; 227 /* Initialize the sentinel node of the trellis. */ 228 tokens[eob][0].rate = 0; 229 tokens[eob][0].error = 0; 230 tokens[eob][0].next = 16; 231 tokens[eob][0].token = DCT_EOB_TOKEN; 232 tokens[eob][0].qc = 0; 233 *(tokens[eob] + 1) = *(tokens[eob] + 0); 234 next = eob; 235 for (i = eob; i-- > i0;) 236 { 237 int base_bits; 238 int d2; 239 int dx; 240 241 rc = vp8_default_zig_zag1d[i]; 242 x = qcoeff_ptr[rc]; 243 /* Only add a trellis state for non-zero coefficients. */ 244 if (x) 245 { 246 int shortcut=0; 247 error0 = tokens[next][0].error; 248 error1 = tokens[next][1].error; 249 /* Evaluate the first possibility for this state. */ 250 rate0 = tokens[next][0].rate; 251 rate1 = tokens[next][1].rate; 252 t0 = (vp8_dct_value_tokens_ptr + x)->Token; 253 /* Consider both possible successor states. */ 254 if (next < 16) 255 { 256 band = vp8_coef_bands[i + 1]; 257 pt = vp8_prev_token_class[t0]; 258 rate0 += 259 mb->token_costs[type][band][pt][tokens[next][0].token]; 260 rate1 += 261 mb->token_costs[type][band][pt][tokens[next][1].token]; 262 } 263 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); 264 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); 265 if (rd_cost0 == rd_cost1) 266 { 267 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0); 268 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1); 269 } 270 /* And pick the best. */ 271 best = rd_cost1 < rd_cost0; 272 base_bits = *(vp8_dct_value_cost_ptr + x); 273 dx = dqcoeff_ptr[rc] - coeff_ptr[rc]; 274 d2 = dx*dx; 275 tokens[i][0].rate = base_bits + (best ? rate1 : rate0); 276 tokens[i][0].error = d2 + (best ? error1 : error0); 277 tokens[i][0].next = next; 278 tokens[i][0].token = t0; 279 tokens[i][0].qc = x; 280 best_mask[0] |= best << i; 281 /* Evaluate the second possibility for this state. */ 282 rate0 = tokens[next][0].rate; 283 rate1 = tokens[next][1].rate; 284 285 if((abs(x)*dequant_ptr[rc]>abs(coeff_ptr[rc])) && 286 (abs(x)*dequant_ptr[rc]<abs(coeff_ptr[rc])+dequant_ptr[rc])) 287 shortcut = 1; 288 else 289 shortcut = 0; 290 291 if(shortcut) 292 { 293 sz = -(x < 0); 294 x -= 2*sz + 1; 295 } 296 297 /* Consider both possible successor states. */ 298 if (!x) 299 { 300 /* If we reduced this coefficient to zero, check to see if 301 * we need to move the EOB back here. 302 */ 303 t0 = tokens[next][0].token == DCT_EOB_TOKEN ? 304 DCT_EOB_TOKEN : ZERO_TOKEN; 305 t1 = tokens[next][1].token == DCT_EOB_TOKEN ? 306 DCT_EOB_TOKEN : ZERO_TOKEN; 307 } 308 else 309 { 310 t0=t1 = (vp8_dct_value_tokens_ptr + x)->Token; 311 } 312 if (next < 16) 313 { 314 band = vp8_coef_bands[i + 1]; 315 if(t0!=DCT_EOB_TOKEN) 316 { 317 pt = vp8_prev_token_class[t0]; 318 rate0 += mb->token_costs[type][band][pt][ 319 tokens[next][0].token]; 320 } 321 if(t1!=DCT_EOB_TOKEN) 322 { 323 pt = vp8_prev_token_class[t1]; 324 rate1 += mb->token_costs[type][band][pt][ 325 tokens[next][1].token]; 326 } 327 } 328 329 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); 330 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); 331 if (rd_cost0 == rd_cost1) 332 { 333 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0); 334 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1); 335 } 336 /* And pick the best. */ 337 best = rd_cost1 < rd_cost0; 338 base_bits = *(vp8_dct_value_cost_ptr + x); 339 340 if(shortcut) 341 { 342 dx -= (dequant_ptr[rc] + sz) ^ sz; 343 d2 = dx*dx; 344 } 345 tokens[i][1].rate = base_bits + (best ? rate1 : rate0); 346 tokens[i][1].error = d2 + (best ? error1 : error0); 347 tokens[i][1].next = next; 348 tokens[i][1].token =best?t1:t0; 349 tokens[i][1].qc = x; 350 best_mask[1] |= best << i; 351 /* Finally, make this the new head of the trellis. */ 352 next = i; 353 } 354 /* There's no choice to make for a zero coefficient, so we don't 355 * add a new trellis node, but we do need to update the costs. 356 */ 357 else 358 { 359 band = vp8_coef_bands[i + 1]; 360 t0 = tokens[next][0].token; 361 t1 = tokens[next][1].token; 362 /* Update the cost of each path if we're past the EOB token. */ 363 if (t0 != DCT_EOB_TOKEN) 364 { 365 tokens[next][0].rate += mb->token_costs[type][band][0][t0]; 366 tokens[next][0].token = ZERO_TOKEN; 367 } 368 if (t1 != DCT_EOB_TOKEN) 369 { 370 tokens[next][1].rate += mb->token_costs[type][band][0][t1]; 371 tokens[next][1].token = ZERO_TOKEN; 372 } 373 /* Don't update next, because we didn't add a new node. */ 374 } 375 } 376 377 /* Now pick the best path through the whole trellis. */ 378 band = vp8_coef_bands[i + 1]; 379 VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); 380 rate0 = tokens[next][0].rate; 381 rate1 = tokens[next][1].rate; 382 error0 = tokens[next][0].error; 383 error1 = tokens[next][1].error; 384 t0 = tokens[next][0].token; 385 t1 = tokens[next][1].token; 386 rate0 += mb->token_costs[type][band][pt][t0]; 387 rate1 += mb->token_costs[type][band][pt][t1]; 388 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); 389 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); 390 if (rd_cost0 == rd_cost1) 391 { 392 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0); 393 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1); 394 } 395 best = rd_cost1 < rd_cost0; 396 final_eob = i0 - 1; 397 for (i = next; i < eob; i = next) 398 { 399 x = tokens[i][best].qc; 400 if (x) 401 final_eob = i; 402 rc = vp8_default_zig_zag1d[i]; 403 qcoeff_ptr[rc] = x; 404 dqcoeff_ptr[rc] = x * dequant_ptr[rc]; 405 next = tokens[i][best].next; 406 best = (best_mask[best] >> i) & 1; 407 } 408 final_eob++; 409 410 *a = *l = (final_eob != !type); 411 *d->eob = (char)final_eob; 412} 413static void check_reset_2nd_coeffs(MACROBLOCKD *x, int type, 414 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) 415{ 416 int sum=0; 417 int i; 418 BLOCKD *bd = &x->block[24]; 419 420 if(bd->dequant[0]>=35 && bd->dequant[1]>=35) 421 return; 422 423 for(i=0;i<(*bd->eob);i++) 424 { 425 int coef = bd->dqcoeff[vp8_default_zig_zag1d[i]]; 426 sum+= (coef>=0)?coef:-coef; 427 if(sum>=35) 428 return; 429 } 430 /************************************************************************** 431 our inverse hadamard transform effectively is weighted sum of all 16 inputs 432 with weight either 1 or -1. It has a last stage scaling of (sum+3)>>3. And 433 dc only idct is (dc+4)>>3. So if all the sums are between -35 and 29, the 434 output after inverse wht and idct will be all zero. A sum of absolute value 435 smaller than 35 guarantees all 16 different (+1/-1) weighted sums in wht 436 fall between -35 and +35. 437 **************************************************************************/ 438 if(sum < 35) 439 { 440 for(i=0;i<(*bd->eob);i++) 441 { 442 int rc = vp8_default_zig_zag1d[i]; 443 bd->qcoeff[rc]=0; 444 bd->dqcoeff[rc]=0; 445 } 446 *bd->eob = 0; 447 *a = *l = (*bd->eob != !type); 448 } 449} 450 451static void optimize_mb(MACROBLOCK *x) 452{ 453 int b; 454 int type; 455 int has_2nd_order; 456 457 ENTROPY_CONTEXT_PLANES t_above, t_left; 458 ENTROPY_CONTEXT *ta; 459 ENTROPY_CONTEXT *tl; 460 461 memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); 462 memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); 463 464 ta = (ENTROPY_CONTEXT *)&t_above; 465 tl = (ENTROPY_CONTEXT *)&t_left; 466 467 has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED 468 && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); 469 type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; 470 471 for (b = 0; b < 16; b++) 472 { 473 optimize_b(x, b, type, 474 ta + vp8_block2above[b], tl + vp8_block2left[b]); 475 } 476 477 for (b = 16; b < 24; b++) 478 { 479 optimize_b(x, b, PLANE_TYPE_UV, 480 ta + vp8_block2above[b], tl + vp8_block2left[b]); 481 } 482 483 if (has_2nd_order) 484 { 485 b=24; 486 optimize_b(x, b, PLANE_TYPE_Y2, 487 ta + vp8_block2above[b], tl + vp8_block2left[b]); 488 check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, 489 ta + vp8_block2above[b], tl + vp8_block2left[b]); 490 } 491} 492 493 494void vp8_optimize_mby(MACROBLOCK *x) 495{ 496 int b; 497 int type; 498 int has_2nd_order; 499 500 ENTROPY_CONTEXT_PLANES t_above, t_left; 501 ENTROPY_CONTEXT *ta; 502 ENTROPY_CONTEXT *tl; 503 504 if (!x->e_mbd.above_context) 505 return; 506 507 if (!x->e_mbd.left_context) 508 return; 509 510 memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); 511 memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); 512 513 ta = (ENTROPY_CONTEXT *)&t_above; 514 tl = (ENTROPY_CONTEXT *)&t_left; 515 516 has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED 517 && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); 518 type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; 519 520 for (b = 0; b < 16; b++) 521 { 522 optimize_b(x, b, type, 523 ta + vp8_block2above[b], tl + vp8_block2left[b]); 524 } 525 526 527 if (has_2nd_order) 528 { 529 b=24; 530 optimize_b(x, b, PLANE_TYPE_Y2, 531 ta + vp8_block2above[b], tl + vp8_block2left[b]); 532 check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, 533 ta + vp8_block2above[b], tl + vp8_block2left[b]); 534 } 535} 536 537void vp8_optimize_mbuv(MACROBLOCK *x) 538{ 539 int b; 540 ENTROPY_CONTEXT_PLANES t_above, t_left; 541 ENTROPY_CONTEXT *ta; 542 ENTROPY_CONTEXT *tl; 543 544 if (!x->e_mbd.above_context) 545 return; 546 547 if (!x->e_mbd.left_context) 548 return; 549 550 memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); 551 memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); 552 553 ta = (ENTROPY_CONTEXT *)&t_above; 554 tl = (ENTROPY_CONTEXT *)&t_left; 555 556 for (b = 16; b < 24; b++) 557 { 558 optimize_b(x, b, PLANE_TYPE_UV, 559 ta + vp8_block2above[b], tl + vp8_block2left[b]); 560 } 561} 562 563void vp8_encode_inter16x16(MACROBLOCK *x) 564{ 565 vp8_build_inter_predictors_mb(&x->e_mbd); 566 567 vp8_subtract_mb(x); 568 569 transform_mb(x); 570 571 vp8_quantize_mb(x); 572 573 if (x->optimize) 574 optimize_mb(x); 575} 576 577/* this funciton is used by first pass only */ 578void vp8_encode_inter16x16y(MACROBLOCK *x) 579{ 580 BLOCK *b = &x->block[0]; 581 582 vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.dst.y_buffer, 583 x->e_mbd.dst.y_stride); 584 585 vp8_subtract_mby(x->src_diff, *(b->base_src), 586 b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride); 587 588 transform_mby(x); 589 590 vp8_quantize_mby(x); 591 592 vp8_inverse_transform_mby(&x->e_mbd); 593} 594