1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12#include "vpx_config.h" 13#include "vp8_rtcd.h" 14#include "encodemb.h" 15#include "vp8/common/reconinter.h" 16#include "quantize.h" 17#include "tokenize.h" 18#include "vp8/common/invtrans.h" 19#include "vpx_mem/vpx_mem.h" 20#include "rdopt.h" 21 22void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) 23{ 24 unsigned char *src_ptr = (*(be->base_src) + be->src); 25 short *diff_ptr = be->src_diff; 26 unsigned char *pred_ptr = bd->predictor; 27 int src_stride = be->src_stride; 28 29 int r, c; 30 31 for (r = 0; r < 4; r++) 32 { 33 for (c = 0; c < 4; c++) 34 { 35 diff_ptr[c] = src_ptr[c] - pred_ptr[c]; 36 } 37 38 diff_ptr += pitch; 39 pred_ptr += pitch; 40 src_ptr += src_stride; 41 } 42} 43 44void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, 45 int src_stride, unsigned char *upred, 46 unsigned char *vpred, int pred_stride) 47{ 48 short *udiff = diff + 256; 49 short *vdiff = diff + 320; 50 51 int r, c; 52 53 for (r = 0; r < 8; r++) 54 { 55 for (c = 0; c < 8; c++) 56 { 57 udiff[c] = usrc[c] - upred[c]; 58 } 59 60 udiff += 8; 61 upred += pred_stride; 62 usrc += src_stride; 63 } 64 65 for (r = 0; r < 8; r++) 66 { 67 for (c = 0; c < 8; c++) 68 { 69 vdiff[c] = vsrc[c] - vpred[c]; 70 } 71 72 vdiff += 8; 73 vpred += pred_stride; 74 vsrc += src_stride; 75 } 76} 77 78void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, 79 unsigned char *pred, int pred_stride) 80{ 81 int r, c; 82 83 for (r = 0; r < 16; r++) 84 { 85 for (c = 0; c < 16; c++) 86 { 87 diff[c] = src[c] - pred[c]; 88 } 89 90 diff += 16; 91 pred += pred_stride; 92 src += src_stride; 93 } 94} 95 96static void vp8_subtract_mb(MACROBLOCK *x) 97{ 98 BLOCK *b = &x->block[0]; 99 100 vp8_subtract_mby(x->src_diff, *(b->base_src), 101 b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride); 102 vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, 103 x->src.v_buffer, x->src.uv_stride, x->e_mbd.dst.u_buffer, 104 x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride); 105} 106 107static void build_dcblock(MACROBLOCK *x) 108{ 109 short *src_diff_ptr = &x->src_diff[384]; 110 int i; 111 112 for (i = 0; i < 16; i++) 113 { 114 src_diff_ptr[i] = x->coeff[i * 16]; 115 } 116} 117 118void vp8_transform_mbuv(MACROBLOCK *x) 119{ 120 int i; 121 122 for (i = 16; i < 24; i += 2) 123 { 124 x->short_fdct8x4(&x->block[i].src_diff[0], 125 &x->block[i].coeff[0], 16); 126 } 127} 128 129 130void vp8_transform_intra_mby(MACROBLOCK *x) 131{ 132 int i; 133 134 for (i = 0; i < 16; i += 2) 135 { 136 x->short_fdct8x4(&x->block[i].src_diff[0], 137 &x->block[i].coeff[0], 32); 138 } 139 140 /* build dc block from 16 y dc values */ 141 build_dcblock(x); 142 143 /* do 2nd order transform on the dc block */ 144 x->short_walsh4x4(&x->block[24].src_diff[0], 145 &x->block[24].coeff[0], 8); 146 147} 148 149 150static void transform_mb(MACROBLOCK *x) 151{ 152 int i; 153 154 for (i = 0; i < 16; i += 2) 155 { 156 x->short_fdct8x4(&x->block[i].src_diff[0], 157 &x->block[i].coeff[0], 32); 158 } 159 160 /* build dc block from 16 y dc values */ 161 if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) 162 build_dcblock(x); 163 164 for (i = 16; i < 24; i += 2) 165 { 166 x->short_fdct8x4(&x->block[i].src_diff[0], 167 &x->block[i].coeff[0], 16); 168 } 169 170 /* do 2nd order transform on the dc block */ 171 if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) 172 x->short_walsh4x4(&x->block[24].src_diff[0], 173 &x->block[24].coeff[0], 8); 174 175} 176 177 178static void transform_mby(MACROBLOCK *x) 179{ 180 int i; 181 182 for (i = 0; i < 16; i += 2) 183 { 184 x->short_fdct8x4(&x->block[i].src_diff[0], 185 &x->block[i].coeff[0], 32); 186 } 187 188 /* build dc block from 16 y dc values */ 189 if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) 190 { 191 build_dcblock(x); 192 x->short_walsh4x4(&x->block[24].src_diff[0], 193 &x->block[24].coeff[0], 8); 194 } 195} 196 197 198 199#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) 200 201typedef struct vp8_token_state vp8_token_state; 202 203struct vp8_token_state{ 204 int rate; 205 int error; 206 signed char next; 207 signed char token; 208 short qc; 209}; 210 211/* TODO: experiments to find optimal multiple numbers */ 212#define Y1_RD_MULT 4 213#define UV_RD_MULT 2 214#define Y2_RD_MULT 16 215 216static const int plane_rd_mult[4]= 217{ 218 Y1_RD_MULT, 219 Y2_RD_MULT, 220 UV_RD_MULT, 221 Y1_RD_MULT 222}; 223 224static void optimize_b(MACROBLOCK *mb, int ib, int type, 225 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) 226{ 227 BLOCK *b; 228 BLOCKD *d; 229 vp8_token_state tokens[17][2]; 230 unsigned best_mask[2]; 231 const short *dequant_ptr; 232 const short *coeff_ptr; 233 short *qcoeff_ptr; 234 short *dqcoeff_ptr; 235 int eob; 236 int i0; 237 int rc; 238 int x; 239 int sz = 0; 240 int next; 241 int rdmult; 242 int rddiv; 243 int final_eob; 244 int rd_cost0; 245 int rd_cost1; 246 int rate0; 247 int rate1; 248 int error0; 249 int error1; 250 int t0; 251 int t1; 252 int best; 253 int band; 254 int pt; 255 int i; 256 int err_mult = plane_rd_mult[type]; 257 258 b = &mb->block[ib]; 259 d = &mb->e_mbd.block[ib]; 260 261 dequant_ptr = d->dequant; 262 coeff_ptr = b->coeff; 263 qcoeff_ptr = d->qcoeff; 264 dqcoeff_ptr = d->dqcoeff; 265 i0 = !type; 266 eob = *d->eob; 267 268 /* Now set up a Viterbi trellis to evaluate alternative roundings. */ 269 rdmult = mb->rdmult * err_mult; 270 if(mb->e_mbd.mode_info_context->mbmi.ref_frame==INTRA_FRAME) 271 rdmult = (rdmult * 9)>>4; 272 273 rddiv = mb->rddiv; 274 best_mask[0] = best_mask[1] = 0; 275 /* Initialize the sentinel node of the trellis. */ 276 tokens[eob][0].rate = 0; 277 tokens[eob][0].error = 0; 278 tokens[eob][0].next = 16; 279 tokens[eob][0].token = DCT_EOB_TOKEN; 280 tokens[eob][0].qc = 0; 281 *(tokens[eob] + 1) = *(tokens[eob] + 0); 282 next = eob; 283 for (i = eob; i-- > i0;) 284 { 285 int base_bits; 286 int d2; 287 int dx; 288 289 rc = vp8_default_zig_zag1d[i]; 290 x = qcoeff_ptr[rc]; 291 /* Only add a trellis state for non-zero coefficients. */ 292 if (x) 293 { 294 int shortcut=0; 295 error0 = tokens[next][0].error; 296 error1 = tokens[next][1].error; 297 /* Evaluate the first possibility for this state. */ 298 rate0 = tokens[next][0].rate; 299 rate1 = tokens[next][1].rate; 300 t0 = (vp8_dct_value_tokens_ptr + x)->Token; 301 /* Consider both possible successor states. */ 302 if (next < 16) 303 { 304 band = vp8_coef_bands[i + 1]; 305 pt = vp8_prev_token_class[t0]; 306 rate0 += 307 mb->token_costs[type][band][pt][tokens[next][0].token]; 308 rate1 += 309 mb->token_costs[type][band][pt][tokens[next][1].token]; 310 } 311 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); 312 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); 313 if (rd_cost0 == rd_cost1) 314 { 315 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0); 316 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1); 317 } 318 /* And pick the best. */ 319 best = rd_cost1 < rd_cost0; 320 base_bits = *(vp8_dct_value_cost_ptr + x); 321 dx = dqcoeff_ptr[rc] - coeff_ptr[rc]; 322 d2 = dx*dx; 323 tokens[i][0].rate = base_bits + (best ? rate1 : rate0); 324 tokens[i][0].error = d2 + (best ? error1 : error0); 325 tokens[i][0].next = next; 326 tokens[i][0].token = t0; 327 tokens[i][0].qc = x; 328 best_mask[0] |= best << i; 329 /* Evaluate the second possibility for this state. */ 330 rate0 = tokens[next][0].rate; 331 rate1 = tokens[next][1].rate; 332 333 if((abs(x)*dequant_ptr[rc]>abs(coeff_ptr[rc])) && 334 (abs(x)*dequant_ptr[rc]<abs(coeff_ptr[rc])+dequant_ptr[rc])) 335 shortcut = 1; 336 else 337 shortcut = 0; 338 339 if(shortcut) 340 { 341 sz = -(x < 0); 342 x -= 2*sz + 1; 343 } 344 345 /* Consider both possible successor states. */ 346 if (!x) 347 { 348 /* If we reduced this coefficient to zero, check to see if 349 * we need to move the EOB back here. 350 */ 351 t0 = tokens[next][0].token == DCT_EOB_TOKEN ? 352 DCT_EOB_TOKEN : ZERO_TOKEN; 353 t1 = tokens[next][1].token == DCT_EOB_TOKEN ? 354 DCT_EOB_TOKEN : ZERO_TOKEN; 355 } 356 else 357 { 358 t0=t1 = (vp8_dct_value_tokens_ptr + x)->Token; 359 } 360 if (next < 16) 361 { 362 band = vp8_coef_bands[i + 1]; 363 if(t0!=DCT_EOB_TOKEN) 364 { 365 pt = vp8_prev_token_class[t0]; 366 rate0 += mb->token_costs[type][band][pt][ 367 tokens[next][0].token]; 368 } 369 if(t1!=DCT_EOB_TOKEN) 370 { 371 pt = vp8_prev_token_class[t1]; 372 rate1 += mb->token_costs[type][band][pt][ 373 tokens[next][1].token]; 374 } 375 } 376 377 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); 378 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); 379 if (rd_cost0 == rd_cost1) 380 { 381 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0); 382 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1); 383 } 384 /* And pick the best. */ 385 best = rd_cost1 < rd_cost0; 386 base_bits = *(vp8_dct_value_cost_ptr + x); 387 388 if(shortcut) 389 { 390 dx -= (dequant_ptr[rc] + sz) ^ sz; 391 d2 = dx*dx; 392 } 393 tokens[i][1].rate = base_bits + (best ? rate1 : rate0); 394 tokens[i][1].error = d2 + (best ? error1 : error0); 395 tokens[i][1].next = next; 396 tokens[i][1].token =best?t1:t0; 397 tokens[i][1].qc = x; 398 best_mask[1] |= best << i; 399 /* Finally, make this the new head of the trellis. */ 400 next = i; 401 } 402 /* There's no choice to make for a zero coefficient, so we don't 403 * add a new trellis node, but we do need to update the costs. 404 */ 405 else 406 { 407 band = vp8_coef_bands[i + 1]; 408 t0 = tokens[next][0].token; 409 t1 = tokens[next][1].token; 410 /* Update the cost of each path if we're past the EOB token. */ 411 if (t0 != DCT_EOB_TOKEN) 412 { 413 tokens[next][0].rate += mb->token_costs[type][band][0][t0]; 414 tokens[next][0].token = ZERO_TOKEN; 415 } 416 if (t1 != DCT_EOB_TOKEN) 417 { 418 tokens[next][1].rate += mb->token_costs[type][band][0][t1]; 419 tokens[next][1].token = ZERO_TOKEN; 420 } 421 /* Don't update next, because we didn't add a new node. */ 422 } 423 } 424 425 /* Now pick the best path through the whole trellis. */ 426 band = vp8_coef_bands[i + 1]; 427 VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); 428 rate0 = tokens[next][0].rate; 429 rate1 = tokens[next][1].rate; 430 error0 = tokens[next][0].error; 431 error1 = tokens[next][1].error; 432 t0 = tokens[next][0].token; 433 t1 = tokens[next][1].token; 434 rate0 += mb->token_costs[type][band][pt][t0]; 435 rate1 += mb->token_costs[type][band][pt][t1]; 436 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); 437 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); 438 if (rd_cost0 == rd_cost1) 439 { 440 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0); 441 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1); 442 } 443 best = rd_cost1 < rd_cost0; 444 final_eob = i0 - 1; 445 for (i = next; i < eob; i = next) 446 { 447 x = tokens[i][best].qc; 448 if (x) 449 final_eob = i; 450 rc = vp8_default_zig_zag1d[i]; 451 qcoeff_ptr[rc] = x; 452 dqcoeff_ptr[rc] = x * dequant_ptr[rc]; 453 next = tokens[i][best].next; 454 best = (best_mask[best] >> i) & 1; 455 } 456 final_eob++; 457 458 *a = *l = (final_eob != !type); 459 *d->eob = (char)final_eob; 460} 461static void check_reset_2nd_coeffs(MACROBLOCKD *x, int type, 462 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) 463{ 464 int sum=0; 465 int i; 466 BLOCKD *bd = &x->block[24]; 467 468 if(bd->dequant[0]>=35 && bd->dequant[1]>=35) 469 return; 470 471 for(i=0;i<(*bd->eob);i++) 472 { 473 int coef = bd->dqcoeff[vp8_default_zig_zag1d[i]]; 474 sum+= (coef>=0)?coef:-coef; 475 if(sum>=35) 476 return; 477 } 478 /************************************************************************** 479 our inverse hadamard transform effectively is weighted sum of all 16 inputs 480 with weight either 1 or -1. It has a last stage scaling of (sum+3)>>3. And 481 dc only idct is (dc+4)>>3. So if all the sums are between -35 and 29, the 482 output after inverse wht and idct will be all zero. A sum of absolute value 483 smaller than 35 guarantees all 16 different (+1/-1) weighted sums in wht 484 fall between -35 and +35. 485 **************************************************************************/ 486 if(sum < 35) 487 { 488 for(i=0;i<(*bd->eob);i++) 489 { 490 int rc = vp8_default_zig_zag1d[i]; 491 bd->qcoeff[rc]=0; 492 bd->dqcoeff[rc]=0; 493 } 494 *bd->eob = 0; 495 *a = *l = (*bd->eob != !type); 496 } 497} 498 499static void optimize_mb(MACROBLOCK *x) 500{ 501 int b; 502 int type; 503 int has_2nd_order; 504 505 ENTROPY_CONTEXT_PLANES t_above, t_left; 506 ENTROPY_CONTEXT *ta; 507 ENTROPY_CONTEXT *tl; 508 509 vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); 510 vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); 511 512 ta = (ENTROPY_CONTEXT *)&t_above; 513 tl = (ENTROPY_CONTEXT *)&t_left; 514 515 has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED 516 && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); 517 type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; 518 519 for (b = 0; b < 16; b++) 520 { 521 optimize_b(x, b, type, 522 ta + vp8_block2above[b], tl + vp8_block2left[b]); 523 } 524 525 for (b = 16; b < 24; b++) 526 { 527 optimize_b(x, b, PLANE_TYPE_UV, 528 ta + vp8_block2above[b], tl + vp8_block2left[b]); 529 } 530 531 if (has_2nd_order) 532 { 533 b=24; 534 optimize_b(x, b, PLANE_TYPE_Y2, 535 ta + vp8_block2above[b], tl + vp8_block2left[b]); 536 check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, 537 ta + vp8_block2above[b], tl + vp8_block2left[b]); 538 } 539} 540 541 542void vp8_optimize_mby(MACROBLOCK *x) 543{ 544 int b; 545 int type; 546 int has_2nd_order; 547 548 ENTROPY_CONTEXT_PLANES t_above, t_left; 549 ENTROPY_CONTEXT *ta; 550 ENTROPY_CONTEXT *tl; 551 552 if (!x->e_mbd.above_context) 553 return; 554 555 if (!x->e_mbd.left_context) 556 return; 557 558 vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); 559 vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); 560 561 ta = (ENTROPY_CONTEXT *)&t_above; 562 tl = (ENTROPY_CONTEXT *)&t_left; 563 564 has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED 565 && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); 566 type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; 567 568 for (b = 0; b < 16; b++) 569 { 570 optimize_b(x, b, type, 571 ta + vp8_block2above[b], tl + vp8_block2left[b]); 572 } 573 574 575 if (has_2nd_order) 576 { 577 b=24; 578 optimize_b(x, b, PLANE_TYPE_Y2, 579 ta + vp8_block2above[b], tl + vp8_block2left[b]); 580 check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, 581 ta + vp8_block2above[b], tl + vp8_block2left[b]); 582 } 583} 584 585void vp8_optimize_mbuv(MACROBLOCK *x) 586{ 587 int b; 588 ENTROPY_CONTEXT_PLANES t_above, t_left; 589 ENTROPY_CONTEXT *ta; 590 ENTROPY_CONTEXT *tl; 591 592 if (!x->e_mbd.above_context) 593 return; 594 595 if (!x->e_mbd.left_context) 596 return; 597 598 vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); 599 vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); 600 601 ta = (ENTROPY_CONTEXT *)&t_above; 602 tl = (ENTROPY_CONTEXT *)&t_left; 603 604 for (b = 16; b < 24; b++) 605 { 606 optimize_b(x, b, PLANE_TYPE_UV, 607 ta + vp8_block2above[b], tl + vp8_block2left[b]); 608 } 609} 610 611void vp8_encode_inter16x16(MACROBLOCK *x) 612{ 613 vp8_build_inter_predictors_mb(&x->e_mbd); 614 615 vp8_subtract_mb(x); 616 617 transform_mb(x); 618 619 vp8_quantize_mb(x); 620 621 if (x->optimize) 622 optimize_mb(x); 623} 624 625/* this funciton is used by first pass only */ 626void vp8_encode_inter16x16y(MACROBLOCK *x) 627{ 628 BLOCK *b = &x->block[0]; 629 630 vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.dst.y_buffer, 631 x->e_mbd.dst.y_stride); 632 633 vp8_subtract_mby(x->src_diff, *(b->base_src), 634 b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride); 635 636 transform_mby(x); 637 638 vp8_quantize_mby(x); 639 640 vp8_inverse_transform_mby(&x->e_mbd); 641} 642