1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12#include "./vp9_rtcd.h" 13#include "./vpx_config.h" 14 15#include "vpx_mem/vpx_mem.h" 16 17#include "vp9/common/vp9_idct.h" 18#include "vp9/common/vp9_reconinter.h" 19#include "vp9/common/vp9_reconintra.h" 20#include "vp9/common/vp9_systemdependent.h" 21 22#include "vp9/encoder/vp9_encodemb.h" 23#include "vp9/encoder/vp9_quantize.h" 24#include "vp9/encoder/vp9_rd.h" 25#include "vp9/encoder/vp9_tokenize.h" 26 27struct optimize_ctx { 28 ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; 29 ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; 30}; 31 32struct encode_b_args { 33 MACROBLOCK *x; 34 struct optimize_ctx *ctx; 35 int8_t *skip; 36}; 37 38void vp9_subtract_block_c(int rows, int cols, 39 int16_t *diff, ptrdiff_t diff_stride, 40 const uint8_t *src, ptrdiff_t src_stride, 41 const uint8_t *pred, ptrdiff_t pred_stride) { 42 int r, c; 43 44 for (r = 0; r < rows; r++) { 45 for (c = 0; c < cols; c++) 46 diff[c] = src[c] - pred[c]; 47 48 diff += diff_stride; 49 pred += pred_stride; 50 src += src_stride; 51 } 52} 53 54void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { 55 struct macroblock_plane *const p = &x->plane[plane]; 56 const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane]; 57 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); 58 const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 59 const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; 60 61 vp9_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, 62 pd->dst.buf, pd->dst.stride); 63} 64 65#define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF) 66 67typedef struct vp9_token_state { 68 int rate; 69 int error; 70 int next; 71 signed char token; 72 short qc; 73} vp9_token_state; 74 75// TODO(jimbankoski): experiment to find optimal RD numbers. 76static const int plane_rd_mult[PLANE_TYPES] = { 4, 2 }; 77 78#define UPDATE_RD_COST()\ 79{\ 80 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\ 81 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\ 82 if (rd_cost0 == rd_cost1) {\ 83 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\ 84 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\ 85 }\ 86} 87 88// This function is a place holder for now but may ultimately need 89// to scan previous tokens to work out the correct context. 90static int trellis_get_coeff_context(const int16_t *scan, 91 const int16_t *nb, 92 int idx, int token, 93 uint8_t *token_cache) { 94 int bak = token_cache[scan[idx]], pt; 95 token_cache[scan[idx]] = vp9_pt_energy_class[token]; 96 pt = get_coef_context(nb, token_cache, idx + 1); 97 token_cache[scan[idx]] = bak; 98 return pt; 99} 100 101static int optimize_b(MACROBLOCK *mb, int plane, int block, 102 TX_SIZE tx_size, int ctx) { 103 MACROBLOCKD *const xd = &mb->e_mbd; 104 struct macroblock_plane *const p = &mb->plane[plane]; 105 struct macroblockd_plane *const pd = &xd->plane[plane]; 106 const int ref = is_inter_block(&xd->mi[0].src_mi->mbmi); 107 vp9_token_state tokens[1025][2]; 108 unsigned best_index[1025][2]; 109 uint8_t token_cache[1024]; 110 const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block); 111 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 112 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 113 const int eob = p->eobs[block]; 114 const PLANE_TYPE type = pd->plane_type; 115 const int default_eob = 16 << (tx_size << 1); 116 const int mul = 1 + (tx_size == TX_32X32); 117 const int16_t *dequant_ptr = pd->dequant; 118 const uint8_t *const band_translate = get_band_translate(tx_size); 119 const scan_order *const so = get_scan(xd, tx_size, type, block); 120 const int16_t *const scan = so->scan; 121 const int16_t *const nb = so->neighbors; 122 int next = eob, sz = 0; 123 int64_t rdmult = mb->rdmult * plane_rd_mult[type], rddiv = mb->rddiv; 124 int64_t rd_cost0, rd_cost1; 125 int rate0, rate1, error0, error1, t0, t1; 126 int best, band, pt, i, final_eob; 127 128 assert((!type && !plane) || (type && plane)); 129 assert(eob <= default_eob); 130 131 /* Now set up a Viterbi trellis to evaluate alternative roundings. */ 132 if (!ref) 133 rdmult = (rdmult * 9) >> 4; 134 135 /* Initialize the sentinel node of the trellis. */ 136 tokens[eob][0].rate = 0; 137 tokens[eob][0].error = 0; 138 tokens[eob][0].next = default_eob; 139 tokens[eob][0].token = EOB_TOKEN; 140 tokens[eob][0].qc = 0; 141 tokens[eob][1] = tokens[eob][0]; 142 143 for (i = 0; i < eob; i++) 144 token_cache[scan[i]] = 145 vp9_pt_energy_class[vp9_dct_value_tokens_ptr[qcoeff[scan[i]]].token]; 146 147 for (i = eob; i-- > 0;) { 148 int base_bits, d2, dx; 149 const int rc = scan[i]; 150 int x = qcoeff[rc]; 151 /* Only add a trellis state for non-zero coefficients. */ 152 if (x) { 153 int shortcut = 0; 154 error0 = tokens[next][0].error; 155 error1 = tokens[next][1].error; 156 /* Evaluate the first possibility for this state. */ 157 rate0 = tokens[next][0].rate; 158 rate1 = tokens[next][1].rate; 159 t0 = (vp9_dct_value_tokens_ptr + x)->token; 160 /* Consider both possible successor states. */ 161 if (next < default_eob) { 162 band = band_translate[i + 1]; 163 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); 164 rate0 += mb->token_costs[tx_size][type][ref][band][0][pt] 165 [tokens[next][0].token]; 166 rate1 += mb->token_costs[tx_size][type][ref][band][0][pt] 167 [tokens[next][1].token]; 168 } 169 UPDATE_RD_COST(); 170 /* And pick the best. */ 171 best = rd_cost1 < rd_cost0; 172 base_bits = vp9_dct_value_cost_ptr[x]; 173 dx = mul * (dqcoeff[rc] - coeff[rc]); 174 d2 = dx * dx; 175 tokens[i][0].rate = base_bits + (best ? rate1 : rate0); 176 tokens[i][0].error = d2 + (best ? error1 : error0); 177 tokens[i][0].next = next; 178 tokens[i][0].token = t0; 179 tokens[i][0].qc = x; 180 best_index[i][0] = best; 181 182 /* Evaluate the second possibility for this state. */ 183 rate0 = tokens[next][0].rate; 184 rate1 = tokens[next][1].rate; 185 186 if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) && 187 (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul + 188 dequant_ptr[rc != 0])) 189 shortcut = 1; 190 else 191 shortcut = 0; 192 193 if (shortcut) { 194 sz = -(x < 0); 195 x -= 2 * sz + 1; 196 } 197 198 /* Consider both possible successor states. */ 199 if (!x) { 200 /* If we reduced this coefficient to zero, check to see if 201 * we need to move the EOB back here. 202 */ 203 t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; 204 t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; 205 } else { 206 t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token; 207 } 208 if (next < default_eob) { 209 band = band_translate[i + 1]; 210 if (t0 != EOB_TOKEN) { 211 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); 212 rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt] 213 [tokens[next][0].token]; 214 } 215 if (t1 != EOB_TOKEN) { 216 pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache); 217 rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt] 218 [tokens[next][1].token]; 219 } 220 } 221 222 UPDATE_RD_COST(); 223 /* And pick the best. */ 224 best = rd_cost1 < rd_cost0; 225 base_bits = vp9_dct_value_cost_ptr[x]; 226 227 if (shortcut) { 228 dx -= (dequant_ptr[rc != 0] + sz) ^ sz; 229 d2 = dx * dx; 230 } 231 tokens[i][1].rate = base_bits + (best ? rate1 : rate0); 232 tokens[i][1].error = d2 + (best ? error1 : error0); 233 tokens[i][1].next = next; 234 tokens[i][1].token = best ? t1 : t0; 235 tokens[i][1].qc = x; 236 best_index[i][1] = best; 237 /* Finally, make this the new head of the trellis. */ 238 next = i; 239 } else { 240 /* There's no choice to make for a zero coefficient, so we don't 241 * add a new trellis node, but we do need to update the costs. 242 */ 243 band = band_translate[i + 1]; 244 t0 = tokens[next][0].token; 245 t1 = tokens[next][1].token; 246 /* Update the cost of each path if we're past the EOB token. */ 247 if (t0 != EOB_TOKEN) { 248 tokens[next][0].rate += 249 mb->token_costs[tx_size][type][ref][band][1][0][t0]; 250 tokens[next][0].token = ZERO_TOKEN; 251 } 252 if (t1 != EOB_TOKEN) { 253 tokens[next][1].rate += 254 mb->token_costs[tx_size][type][ref][band][1][0][t1]; 255 tokens[next][1].token = ZERO_TOKEN; 256 } 257 best_index[i][0] = best_index[i][1] = 0; 258 /* Don't update next, because we didn't add a new node. */ 259 } 260 } 261 262 /* Now pick the best path through the whole trellis. */ 263 band = band_translate[i + 1]; 264 rate0 = tokens[next][0].rate; 265 rate1 = tokens[next][1].rate; 266 error0 = tokens[next][0].error; 267 error1 = tokens[next][1].error; 268 t0 = tokens[next][0].token; 269 t1 = tokens[next][1].token; 270 rate0 += mb->token_costs[tx_size][type][ref][band][0][ctx][t0]; 271 rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1]; 272 UPDATE_RD_COST(); 273 best = rd_cost1 < rd_cost0; 274 final_eob = -1; 275 vpx_memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2))); 276 vpx_memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2))); 277 for (i = next; i < eob; i = next) { 278 const int x = tokens[i][best].qc; 279 const int rc = scan[i]; 280 if (x) { 281 final_eob = i; 282 } 283 284 qcoeff[rc] = x; 285 dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul; 286 287 next = tokens[i][best].next; 288 best = best_index[i][best]; 289 } 290 final_eob++; 291 292 mb->plane[plane].eobs[block] = final_eob; 293 return final_eob; 294} 295 296static INLINE void fdct32x32(int rd_transform, 297 const int16_t *src, tran_low_t *dst, 298 int src_stride) { 299 if (rd_transform) 300 vp9_fdct32x32_rd(src, dst, src_stride); 301 else 302 vp9_fdct32x32(src, dst, src_stride); 303} 304 305#if CONFIG_VP9_HIGHBITDEPTH 306static INLINE void high_fdct32x32(int rd_transform, const int16_t *src, 307 tran_low_t *dst, int src_stride) { 308 if (rd_transform) 309 vp9_high_fdct32x32_rd(src, dst, src_stride); 310 else 311 vp9_high_fdct32x32(src, dst, src_stride); 312} 313#endif 314 315void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, 316 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { 317 MACROBLOCKD *const xd = &x->e_mbd; 318 const struct macroblock_plane *const p = &x->plane[plane]; 319 const struct macroblockd_plane *const pd = &xd->plane[plane]; 320 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size]; 321 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 322 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 323 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 324 uint16_t *const eob = &p->eobs[block]; 325 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 326 int i, j; 327 const int16_t *src_diff; 328 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); 329 src_diff = &p->src_diff[4 * (j * diff_stride + i)]; 330 331 switch (tx_size) { 332 case TX_32X32: 333 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 334 vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp, 335 p->quant_fp, p->quant_shift, qcoeff, dqcoeff, 336 pd->dequant, p->zbin_extra, eob, scan_order->scan, 337 scan_order->iscan); 338 break; 339 case TX_16X16: 340 vp9_fdct16x16(src_diff, coeff, diff_stride); 341 vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, 342 p->quant_fp, p->quant_shift, qcoeff, dqcoeff, 343 pd->dequant, p->zbin_extra, eob, 344 scan_order->scan, scan_order->iscan); 345 break; 346 case TX_8X8: 347 vp9_fdct8x8(src_diff, coeff, diff_stride); 348 vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp, 349 p->quant_fp, p->quant_shift, qcoeff, dqcoeff, 350 pd->dequant, p->zbin_extra, eob, 351 scan_order->scan, scan_order->iscan); 352 break; 353 case TX_4X4: 354 x->fwd_txm4x4(src_diff, coeff, diff_stride); 355 vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, 356 p->quant_fp, p->quant_shift, qcoeff, dqcoeff, 357 pd->dequant, p->zbin_extra, eob, 358 scan_order->scan, scan_order->iscan); 359 break; 360 default: 361 assert(0); 362 break; 363 } 364} 365 366void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, 367 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { 368 MACROBLOCKD *const xd = &x->e_mbd; 369 const struct macroblock_plane *const p = &x->plane[plane]; 370 const struct macroblockd_plane *const pd = &xd->plane[plane]; 371 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 372 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 373 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 374 uint16_t *const eob = &p->eobs[block]; 375 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 376 int i, j; 377 const int16_t *src_diff; 378 379 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); 380 src_diff = &p->src_diff[4 * (j * diff_stride + i)]; 381 382 switch (tx_size) { 383 case TX_32X32: 384 vp9_fdct32x32_1(src_diff, coeff, diff_stride); 385 vp9_quantize_dc_32x32(coeff, x->skip_block, p->round, 386 p->quant_fp[0], qcoeff, dqcoeff, 387 pd->dequant[0], eob); 388 break; 389 case TX_16X16: 390 vp9_fdct16x16_1(src_diff, coeff, diff_stride); 391 vp9_quantize_dc(coeff, x->skip_block, p->round, 392 p->quant_fp[0], qcoeff, dqcoeff, 393 pd->dequant[0], eob); 394 break; 395 case TX_8X8: 396 vp9_fdct8x8_1(src_diff, coeff, diff_stride); 397 vp9_quantize_dc(coeff, x->skip_block, p->round, 398 p->quant_fp[0], qcoeff, dqcoeff, 399 pd->dequant[0], eob); 400 break; 401 case TX_4X4: 402 x->fwd_txm4x4(src_diff, coeff, diff_stride); 403 vp9_quantize_dc(coeff, x->skip_block, p->round, 404 p->quant_fp[0], qcoeff, dqcoeff, 405 pd->dequant[0], eob); 406 break; 407 default: 408 assert(0); 409 break; 410 } 411} 412 413void vp9_xform_quant(MACROBLOCK *x, int plane, int block, 414 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { 415 MACROBLOCKD *const xd = &x->e_mbd; 416 const struct macroblock_plane *const p = &x->plane[plane]; 417 const struct macroblockd_plane *const pd = &xd->plane[plane]; 418 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size]; 419 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 420 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 421 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 422 uint16_t *const eob = &p->eobs[block]; 423 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 424 int i, j; 425 const int16_t *src_diff; 426 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); 427 src_diff = &p->src_diff[4 * (j * diff_stride + i)]; 428 429 switch (tx_size) { 430 case TX_32X32: 431 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 432 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, 433 p->quant, p->quant_shift, qcoeff, dqcoeff, 434 pd->dequant, p->zbin_extra, eob, scan_order->scan, 435 scan_order->iscan); 436 break; 437 case TX_16X16: 438 vp9_fdct16x16(src_diff, coeff, diff_stride); 439 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, 440 p->quant, p->quant_shift, qcoeff, dqcoeff, 441 pd->dequant, p->zbin_extra, eob, 442 scan_order->scan, scan_order->iscan); 443 break; 444 case TX_8X8: 445 vp9_fdct8x8(src_diff, coeff, diff_stride); 446 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, 447 p->quant, p->quant_shift, qcoeff, dqcoeff, 448 pd->dequant, p->zbin_extra, eob, 449 scan_order->scan, scan_order->iscan); 450 break; 451 case TX_4X4: 452 x->fwd_txm4x4(src_diff, coeff, diff_stride); 453 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, 454 p->quant, p->quant_shift, qcoeff, dqcoeff, 455 pd->dequant, p->zbin_extra, eob, 456 scan_order->scan, scan_order->iscan); 457 break; 458 default: 459 assert(0); 460 break; 461 } 462} 463 464static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, 465 TX_SIZE tx_size, void *arg) { 466 struct encode_b_args *const args = arg; 467 MACROBLOCK *const x = args->x; 468 MACROBLOCKD *const xd = &x->e_mbd; 469 struct optimize_ctx *const ctx = args->ctx; 470 struct macroblock_plane *const p = &x->plane[plane]; 471 struct macroblockd_plane *const pd = &xd->plane[plane]; 472 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 473 int i, j; 474 uint8_t *dst; 475 ENTROPY_CONTEXT *a, *l; 476 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); 477 dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i]; 478 a = &ctx->ta[plane][i]; 479 l = &ctx->tl[plane][j]; 480 481 // TODO(jingning): per transformed block zero forcing only enabled for 482 // luma component. will integrate chroma components as well. 483 if (x->zcoeff_blk[tx_size][block] && plane == 0) { 484 p->eobs[block] = 0; 485 *a = *l = 0; 486 return; 487 } 488 489 if (!x->skip_recode) { 490 if (max_txsize_lookup[plane_bsize] == tx_size) { 491 if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) { 492 // full forward transform and quantization 493 if (x->quant_fp) 494 vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size); 495 else 496 vp9_xform_quant(x, plane, block, plane_bsize, tx_size); 497 } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) { 498 // fast path forward transform and quantization 499 vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size); 500 } else { 501 // skip forward transform 502 p->eobs[block] = 0; 503 *a = *l = 0; 504 return; 505 } 506 } else { 507 vp9_xform_quant(x, plane, block, plane_bsize, tx_size); 508 } 509 } 510 511 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { 512 const int ctx = combine_entropy_contexts(*a, *l); 513 *a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0; 514 } else { 515 *a = *l = p->eobs[block] > 0; 516 } 517 518 if (p->eobs[block]) 519 *(args->skip) = 0; 520 521 if (x->skip_encode || p->eobs[block] == 0) 522 return; 523 524 switch (tx_size) { 525 case TX_32X32: 526 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 527 break; 528 case TX_16X16: 529 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 530 break; 531 case TX_8X8: 532 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 533 break; 534 case TX_4X4: 535 // this is like vp9_short_idct4x4 but has a special case around eob<=1 536 // which is significant (not just an optimization) for the lossless 537 // case. 538 x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 539 break; 540 default: 541 assert(0 && "Invalid transform size"); 542 break; 543 } 544} 545 546static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, 547 TX_SIZE tx_size, void *arg) { 548 MACROBLOCK *const x = (MACROBLOCK *)arg; 549 MACROBLOCKD *const xd = &x->e_mbd; 550 struct macroblock_plane *const p = &x->plane[plane]; 551 struct macroblockd_plane *const pd = &xd->plane[plane]; 552 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 553 int i, j; 554 uint8_t *dst; 555 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); 556 dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i]; 557 558 vp9_xform_quant(x, plane, block, plane_bsize, tx_size); 559 560 if (p->eobs[block] > 0) 561 x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 562} 563 564void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { 565 vp9_subtract_plane(x, bsize, 0); 566 vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0, 567 encode_block_pass1, x); 568} 569 570void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { 571 MACROBLOCKD *const xd = &x->e_mbd; 572 struct optimize_ctx ctx; 573 MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; 574 struct encode_b_args arg = {x, &ctx, &mbmi->skip}; 575 int plane; 576 577 mbmi->skip = 1; 578 579 if (x->skip) 580 return; 581 582 for (plane = 0; plane < MAX_MB_PLANE; ++plane) { 583 if (!x->skip_recode) 584 vp9_subtract_plane(x, bsize, plane); 585 586 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { 587 const struct macroblockd_plane* const pd = &xd->plane[plane]; 588 const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size; 589 vp9_get_entropy_contexts(bsize, tx_size, pd, 590 ctx.ta[plane], ctx.tl[plane]); 591 } 592 593 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block, 594 &arg); 595 } 596} 597 598static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, 599 TX_SIZE tx_size, void *arg) { 600 struct encode_b_args* const args = arg; 601 MACROBLOCK *const x = args->x; 602 MACROBLOCKD *const xd = &x->e_mbd; 603 MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; 604 struct macroblock_plane *const p = &x->plane[plane]; 605 struct macroblockd_plane *const pd = &xd->plane[plane]; 606 tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block); 607 tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); 608 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 609 const scan_order *scan_order; 610 TX_TYPE tx_type; 611 PREDICTION_MODE mode; 612 const int bwl = b_width_log2(plane_bsize); 613 const int diff_stride = 4 * (1 << bwl); 614 uint8_t *src, *dst; 615 int16_t *src_diff; 616 uint16_t *eob = &p->eobs[block]; 617 const int src_stride = p->src.stride; 618 const int dst_stride = pd->dst.stride; 619 int i, j; 620 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); 621 dst = &pd->dst.buf[4 * (j * dst_stride + i)]; 622 src = &p->src.buf[4 * (j * src_stride + i)]; 623 src_diff = &p->src_diff[4 * (j * diff_stride + i)]; 624 625 switch (tx_size) { 626 case TX_32X32: 627 scan_order = &vp9_default_scan_orders[TX_32X32]; 628 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; 629 vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode, 630 x->skip_encode ? src : dst, 631 x->skip_encode ? src_stride : dst_stride, 632 dst, dst_stride, i, j, plane); 633 if (!x->skip_recode) { 634 vp9_subtract_block(32, 32, src_diff, diff_stride, 635 src, src_stride, dst, dst_stride); 636 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 637 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, 638 p->quant, p->quant_shift, qcoeff, dqcoeff, 639 pd->dequant, p->zbin_extra, eob, scan_order->scan, 640 scan_order->iscan); 641 } 642 if (!x->skip_encode && *eob) 643 vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob); 644 break; 645 case TX_16X16: 646 tx_type = get_tx_type(pd->plane_type, xd); 647 scan_order = &vp9_scan_orders[TX_16X16][tx_type]; 648 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; 649 vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode, 650 x->skip_encode ? src : dst, 651 x->skip_encode ? src_stride : dst_stride, 652 dst, dst_stride, i, j, plane); 653 if (!x->skip_recode) { 654 vp9_subtract_block(16, 16, src_diff, diff_stride, 655 src, src_stride, dst, dst_stride); 656 vp9_fht16x16(src_diff, coeff, diff_stride, tx_type); 657 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, 658 p->quant, p->quant_shift, qcoeff, dqcoeff, 659 pd->dequant, p->zbin_extra, eob, scan_order->scan, 660 scan_order->iscan); 661 } 662 if (!x->skip_encode && *eob) 663 vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob); 664 break; 665 case TX_8X8: 666 tx_type = get_tx_type(pd->plane_type, xd); 667 scan_order = &vp9_scan_orders[TX_8X8][tx_type]; 668 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; 669 vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode, 670 x->skip_encode ? src : dst, 671 x->skip_encode ? src_stride : dst_stride, 672 dst, dst_stride, i, j, plane); 673 if (!x->skip_recode) { 674 vp9_subtract_block(8, 8, src_diff, diff_stride, 675 src, src_stride, dst, dst_stride); 676 vp9_fht8x8(src_diff, coeff, diff_stride, tx_type); 677 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, 678 p->quant_shift, qcoeff, dqcoeff, 679 pd->dequant, p->zbin_extra, eob, scan_order->scan, 680 scan_order->iscan); 681 } 682 if (!x->skip_encode && *eob) 683 vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob); 684 break; 685 case TX_4X4: 686 tx_type = get_tx_type_4x4(pd->plane_type, xd, block); 687 scan_order = &vp9_scan_orders[TX_4X4][tx_type]; 688 mode = plane == 0 ? get_y_mode(xd->mi[0].src_mi, block) : mbmi->uv_mode; 689 vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, 690 x->skip_encode ? src : dst, 691 x->skip_encode ? src_stride : dst_stride, 692 dst, dst_stride, i, j, plane); 693 694 if (!x->skip_recode) { 695 vp9_subtract_block(4, 4, src_diff, diff_stride, 696 src, src_stride, dst, dst_stride); 697 if (tx_type != DCT_DCT) 698 vp9_fht4x4(src_diff, coeff, diff_stride, tx_type); 699 else 700 x->fwd_txm4x4(src_diff, coeff, diff_stride); 701 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, 702 p->quant_shift, qcoeff, dqcoeff, 703 pd->dequant, p->zbin_extra, eob, scan_order->scan, 704 scan_order->iscan); 705 } 706 707 if (!x->skip_encode && *eob) { 708 if (tx_type == DCT_DCT) 709 // this is like vp9_short_idct4x4 but has a special case around eob<=1 710 // which is significant (not just an optimization) for the lossless 711 // case. 712 x->itxm_add(dqcoeff, dst, dst_stride, *eob); 713 else 714 vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type); 715 } 716 break; 717 default: 718 assert(0); 719 break; 720 } 721 if (*eob) 722 *(args->skip) = 0; 723} 724 725void vp9_encode_block_intra(MACROBLOCK *x, int plane, int block, 726 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, 727 int8_t *skip) { 728 struct encode_b_args arg = {x, NULL, skip}; 729 encode_block_intra(plane, block, plane_bsize, tx_size, &arg); 730} 731 732 733void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { 734 const MACROBLOCKD *const xd = &x->e_mbd; 735 struct encode_b_args arg = {x, NULL, &xd->mi[0].src_mi->mbmi.skip}; 736 737 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block_intra, 738 &arg); 739} 740