1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12#include "./vp9_rtcd.h"
13#include "./vpx_config.h"
14
15#include "vpx_mem/vpx_mem.h"
16
17#include "vp9/common/vp9_idct.h"
18#include "vp9/common/vp9_reconinter.h"
19#include "vp9/common/vp9_reconintra.h"
20#include "vp9/common/vp9_systemdependent.h"
21
22#include "vp9/encoder/vp9_encodemb.h"
23#include "vp9/encoder/vp9_quantize.h"
24#include "vp9/encoder/vp9_rdopt.h"
25#include "vp9/encoder/vp9_tokenize.h"
26
27struct optimize_ctx {
28  ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
29  ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
30};
31
32struct encode_b_args {
33  MACROBLOCK *x;
34  struct optimize_ctx *ctx;
35  unsigned char *skip;
36};
37
38void vp9_subtract_block_c(int rows, int cols,
39                          int16_t *diff, ptrdiff_t diff_stride,
40                          const uint8_t *src, ptrdiff_t src_stride,
41                          const uint8_t *pred, ptrdiff_t pred_stride) {
42  int r, c;
43
44  for (r = 0; r < rows; r++) {
45    for (c = 0; c < cols; c++)
46      diff[c] = src[c] - pred[c];
47
48    diff += diff_stride;
49    pred += pred_stride;
50    src  += src_stride;
51  }
52}
53
54void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
55  struct macroblock_plane *const p = &x->plane[plane];
56  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
57  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
58  const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
59  const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
60
61  vp9_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
62                     pd->dst.buf, pd->dst.stride);
63}
64
65#define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
66typedef struct vp9_token_state vp9_token_state;
67
68struct vp9_token_state {
69  int           rate;
70  int           error;
71  int           next;
72  signed char   token;
73  short         qc;
74};
75
76// TODO(jimbankoski): experiment to find optimal RD numbers.
77#define Y1_RD_MULT 4
78#define UV_RD_MULT 2
79
80static const int plane_rd_mult[4] = {
81  Y1_RD_MULT,
82  UV_RD_MULT,
83};
84
85#define UPDATE_RD_COST()\
86{\
87  rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
88  rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
89  if (rd_cost0 == rd_cost1) {\
90    rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
91    rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
92  }\
93}
94
95// This function is a place holder for now but may ultimately need
96// to scan previous tokens to work out the correct context.
97static int trellis_get_coeff_context(const int16_t *scan,
98                                     const int16_t *nb,
99                                     int idx, int token,
100                                     uint8_t *token_cache) {
101  int bak = token_cache[scan[idx]], pt;
102  token_cache[scan[idx]] = vp9_pt_energy_class[token];
103  pt = get_coef_context(nb, token_cache, idx + 1);
104  token_cache[scan[idx]] = bak;
105  return pt;
106}
107
108static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
109                       TX_SIZE tx_size, MACROBLOCK *mb,
110                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
111  MACROBLOCKD *const xd = &mb->e_mbd;
112  struct macroblock_plane *p = &mb->plane[plane];
113  struct macroblockd_plane *pd = &xd->plane[plane];
114  const int ref = is_inter_block(&xd->mi[0]->mbmi);
115  vp9_token_state tokens[1025][2];
116  unsigned best_index[1025][2];
117  const int16_t *coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
118  int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
119  int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
120  int eob = p->eobs[block], final_eob, sz = 0;
121  const int i0 = 0;
122  int rc, x, next, i;
123  int64_t rdmult, rddiv, rd_cost0, rd_cost1;
124  int rate0, rate1, error0, error1, t0, t1;
125  int best, band, pt;
126  PLANE_TYPE type = pd->plane_type;
127  int err_mult = plane_rd_mult[type];
128  const int default_eob = 16 << (tx_size << 1);
129  const int mul = 1 + (tx_size == TX_32X32);
130  uint8_t token_cache[1024];
131  const int16_t *dequant_ptr = pd->dequant;
132  const uint8_t *const band_translate = get_band_translate(tx_size);
133  const scan_order *so = get_scan(xd, tx_size, type, block);
134  const int16_t *scan = so->scan;
135  const int16_t *nb = so->neighbors;
136
137  assert((!type && !plane) || (type && plane));
138  assert(eob <= default_eob);
139
140  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
141  rdmult = mb->rdmult * err_mult;
142  if (!is_inter_block(&mb->e_mbd.mi[0]->mbmi))
143    rdmult = (rdmult * 9) >> 4;
144  rddiv = mb->rddiv;
145  /* Initialize the sentinel node of the trellis. */
146  tokens[eob][0].rate = 0;
147  tokens[eob][0].error = 0;
148  tokens[eob][0].next = default_eob;
149  tokens[eob][0].token = EOB_TOKEN;
150  tokens[eob][0].qc = 0;
151  *(tokens[eob] + 1) = *(tokens[eob] + 0);
152  next = eob;
153  for (i = 0; i < eob; i++)
154    token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[
155        qcoeff[scan[i]]].token];
156
157  for (i = eob; i-- > i0;) {
158    int base_bits, d2, dx;
159
160    rc = scan[i];
161    x = qcoeff[rc];
162    /* Only add a trellis state for non-zero coefficients. */
163    if (x) {
164      int shortcut = 0;
165      error0 = tokens[next][0].error;
166      error1 = tokens[next][1].error;
167      /* Evaluate the first possibility for this state. */
168      rate0 = tokens[next][0].rate;
169      rate1 = tokens[next][1].rate;
170      t0 = (vp9_dct_value_tokens_ptr + x)->token;
171      /* Consider both possible successor states. */
172      if (next < default_eob) {
173        band = band_translate[i + 1];
174        pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
175        rate0 +=
176          mb->token_costs[tx_size][type][ref][band][0][pt]
177                         [tokens[next][0].token];
178        rate1 +=
179          mb->token_costs[tx_size][type][ref][band][0][pt]
180                         [tokens[next][1].token];
181      }
182      UPDATE_RD_COST();
183      /* And pick the best. */
184      best = rd_cost1 < rd_cost0;
185      base_bits = *(vp9_dct_value_cost_ptr + x);
186      dx = mul * (dqcoeff[rc] - coeff[rc]);
187      d2 = dx * dx;
188      tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
189      tokens[i][0].error = d2 + (best ? error1 : error0);
190      tokens[i][0].next = next;
191      tokens[i][0].token = t0;
192      tokens[i][0].qc = x;
193      best_index[i][0] = best;
194
195      /* Evaluate the second possibility for this state. */
196      rate0 = tokens[next][0].rate;
197      rate1 = tokens[next][1].rate;
198
199      if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
200          (abs(x)*dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
201                                         dequant_ptr[rc != 0]))
202        shortcut = 1;
203      else
204        shortcut = 0;
205
206      if (shortcut) {
207        sz = -(x < 0);
208        x -= 2 * sz + 1;
209      }
210
211      /* Consider both possible successor states. */
212      if (!x) {
213        /* If we reduced this coefficient to zero, check to see if
214         *  we need to move the EOB back here.
215         */
216        t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
217        t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
218      } else {
219        t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token;
220      }
221      if (next < default_eob) {
222        band = band_translate[i + 1];
223        if (t0 != EOB_TOKEN) {
224          pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
225          rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
226                                  [tokens[next][0].token];
227        }
228        if (t1 != EOB_TOKEN) {
229          pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
230          rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
231                                  [tokens[next][1].token];
232        }
233      }
234
235      UPDATE_RD_COST();
236      /* And pick the best. */
237      best = rd_cost1 < rd_cost0;
238      base_bits = *(vp9_dct_value_cost_ptr + x);
239
240      if (shortcut) {
241        dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
242        d2 = dx * dx;
243      }
244      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
245      tokens[i][1].error = d2 + (best ? error1 : error0);
246      tokens[i][1].next = next;
247      tokens[i][1].token = best ? t1 : t0;
248      tokens[i][1].qc = x;
249      best_index[i][1] = best;
250      /* Finally, make this the new head of the trellis. */
251      next = i;
252    } else {
253      /* There's no choice to make for a zero coefficient, so we don't
254       *  add a new trellis node, but we do need to update the costs.
255       */
256      band = band_translate[i + 1];
257      t0 = tokens[next][0].token;
258      t1 = tokens[next][1].token;
259      /* Update the cost of each path if we're past the EOB token. */
260      if (t0 != EOB_TOKEN) {
261        tokens[next][0].rate +=
262            mb->token_costs[tx_size][type][ref][band][1][0][t0];
263        tokens[next][0].token = ZERO_TOKEN;
264      }
265      if (t1 != EOB_TOKEN) {
266        tokens[next][1].rate +=
267            mb->token_costs[tx_size][type][ref][band][1][0][t1];
268        tokens[next][1].token = ZERO_TOKEN;
269      }
270      best_index[i][0] = best_index[i][1] = 0;
271      /* Don't update next, because we didn't add a new node. */
272    }
273  }
274
275  /* Now pick the best path through the whole trellis. */
276  band = band_translate[i + 1];
277  pt = combine_entropy_contexts(*a, *l);
278  rate0 = tokens[next][0].rate;
279  rate1 = tokens[next][1].rate;
280  error0 = tokens[next][0].error;
281  error1 = tokens[next][1].error;
282  t0 = tokens[next][0].token;
283  t1 = tokens[next][1].token;
284  rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0];
285  rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1];
286  UPDATE_RD_COST();
287  best = rd_cost1 < rd_cost0;
288  final_eob = i0 - 1;
289  vpx_memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2)));
290  vpx_memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2)));
291  for (i = next; i < eob; i = next) {
292    x = tokens[i][best].qc;
293    if (x) {
294      final_eob = i;
295    }
296    rc = scan[i];
297    qcoeff[rc] = x;
298    dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
299
300    next = tokens[i][best].next;
301    best = best_index[i][best];
302  }
303  final_eob++;
304
305  mb->plane[plane].eobs[block] = final_eob;
306  *a = *l = (final_eob > 0);
307}
308
309static INLINE void fdct32x32(int rd_transform,
310                             const int16_t *src, int16_t *dst, int src_stride) {
311  if (rd_transform)
312    vp9_fdct32x32_rd(src, dst, src_stride);
313  else
314    vp9_fdct32x32(src, dst, src_stride);
315}
316
317void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
318                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
319  MACROBLOCKD *const xd = &x->e_mbd;
320  const struct macroblock_plane *const p = &x->plane[plane];
321  const struct macroblockd_plane *const pd = &xd->plane[plane];
322  const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
323  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
324  int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
325  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
326  uint16_t *const eob = &p->eobs[block];
327  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
328  int i, j;
329  const int16_t *src_diff;
330  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
331  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
332
333  switch (tx_size) {
334    case TX_32X32:
335      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
336      vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
337                           p->quant, p->quant_shift, qcoeff, dqcoeff,
338                           pd->dequant, p->zbin_extra, eob, scan_order->scan,
339                           scan_order->iscan);
340      break;
341    case TX_16X16:
342      vp9_fdct16x16(src_diff, coeff, diff_stride);
343      vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
344                     p->quant, p->quant_shift, qcoeff, dqcoeff,
345                     pd->dequant, p->zbin_extra, eob,
346                     scan_order->scan, scan_order->iscan);
347      break;
348    case TX_8X8:
349      vp9_fdct8x8(src_diff, coeff, diff_stride);
350      vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
351                     p->quant, p->quant_shift, qcoeff, dqcoeff,
352                     pd->dequant, p->zbin_extra, eob,
353                     scan_order->scan, scan_order->iscan);
354      break;
355    case TX_4X4:
356      x->fwd_txm4x4(src_diff, coeff, diff_stride);
357      vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
358                     p->quant, p->quant_shift, qcoeff, dqcoeff,
359                     pd->dequant, p->zbin_extra, eob,
360                     scan_order->scan, scan_order->iscan);
361      break;
362    default:
363      assert(0);
364  }
365}
366
367static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
368                         TX_SIZE tx_size, void *arg) {
369  struct encode_b_args *const args = arg;
370  MACROBLOCK *const x = args->x;
371  MACROBLOCKD *const xd = &x->e_mbd;
372  struct optimize_ctx *const ctx = args->ctx;
373  struct macroblock_plane *const p = &x->plane[plane];
374  struct macroblockd_plane *const pd = &xd->plane[plane];
375  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
376  int i, j;
377  uint8_t *dst;
378  ENTROPY_CONTEXT *a, *l;
379  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
380  dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
381  a = &ctx->ta[plane][i];
382  l = &ctx->tl[plane][j];
383
384  // TODO(jingning): per transformed block zero forcing only enabled for
385  // luma component. will integrate chroma components as well.
386  if (x->zcoeff_blk[tx_size][block] && plane == 0) {
387    p->eobs[block] = 0;
388    *a = *l = 0;
389    return;
390  }
391
392  if (!x->skip_recode)
393    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
394
395  if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
396    optimize_b(plane, block, plane_bsize, tx_size, x, a, l);
397  } else {
398    *a = *l = p->eobs[block] > 0;
399  }
400
401  if (p->eobs[block])
402    *(args->skip) = 0;
403
404  if (x->skip_encode || p->eobs[block] == 0)
405    return;
406
407  switch (tx_size) {
408    case TX_32X32:
409      vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
410      break;
411    case TX_16X16:
412      vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
413      break;
414    case TX_8X8:
415      vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
416      break;
417    case TX_4X4:
418      // this is like vp9_short_idct4x4 but has a special case around eob<=1
419      // which is significant (not just an optimization) for the lossless
420      // case.
421      xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
422      break;
423    default:
424      assert(0 && "Invalid transform size");
425  }
426}
427
428static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
429                               TX_SIZE tx_size, void *arg) {
430  MACROBLOCK *const x = (MACROBLOCK *)arg;
431  MACROBLOCKD *const xd = &x->e_mbd;
432  struct macroblock_plane *const p = &x->plane[plane];
433  struct macroblockd_plane *const pd = &xd->plane[plane];
434  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
435  int i, j;
436  uint8_t *dst;
437  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
438  dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
439
440  vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
441
442  if (p->eobs[block] > 0)
443    xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
444}
445
446void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
447  vp9_subtract_plane(x, bsize, 0);
448  vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
449                                         encode_block_pass1, x);
450}
451
452void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
453  MACROBLOCKD *const xd = &x->e_mbd;
454  struct optimize_ctx ctx;
455  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
456  struct encode_b_args arg = {x, &ctx, &mbmi->skip};
457  int plane;
458
459  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
460    if (!x->skip_recode)
461      vp9_subtract_plane(x, bsize, plane);
462
463    if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
464      const struct macroblockd_plane* const pd = &xd->plane[plane];
465      const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size;
466      vp9_get_entropy_contexts(bsize, tx_size, pd,
467                               ctx.ta[plane], ctx.tl[plane]);
468    }
469
470    vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
471                                           &arg);
472  }
473}
474
475static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
476                               TX_SIZE tx_size, void *arg) {
477  struct encode_b_args* const args = arg;
478  MACROBLOCK *const x = args->x;
479  MACROBLOCKD *const xd = &x->e_mbd;
480  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
481  struct macroblock_plane *const p = &x->plane[plane];
482  struct macroblockd_plane *const pd = &xd->plane[plane];
483  int16_t *coeff = BLOCK_OFFSET(p->coeff, block);
484  int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
485  int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
486  const scan_order *scan_order;
487  TX_TYPE tx_type;
488  MB_PREDICTION_MODE mode;
489  const int bwl = b_width_log2(plane_bsize);
490  const int diff_stride = 4 * (1 << bwl);
491  uint8_t *src, *dst;
492  int16_t *src_diff;
493  uint16_t *eob = &p->eobs[block];
494  const int src_stride = p->src.stride;
495  const int dst_stride = pd->dst.stride;
496  int i, j;
497  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
498  dst = &pd->dst.buf[4 * (j * dst_stride + i)];
499  src = &p->src.buf[4 * (j * src_stride + i)];
500  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
501
502  switch (tx_size) {
503    case TX_32X32:
504      scan_order = &vp9_default_scan_orders[TX_32X32];
505      mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
506      vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
507                              x->skip_encode ? src : dst,
508                              x->skip_encode ? src_stride : dst_stride,
509                              dst, dst_stride, i, j, plane);
510      if (!x->skip_recode) {
511        vp9_subtract_block(32, 32, src_diff, diff_stride,
512                           src, src_stride, dst, dst_stride);
513        fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
514        vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
515                             p->quant, p->quant_shift, qcoeff, dqcoeff,
516                             pd->dequant, p->zbin_extra, eob, scan_order->scan,
517                             scan_order->iscan);
518      }
519      if (!x->skip_encode && *eob)
520        vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
521      break;
522    case TX_16X16:
523      tx_type = get_tx_type(pd->plane_type, xd);
524      scan_order = &vp9_scan_orders[TX_16X16][tx_type];
525      mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
526      vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
527                              x->skip_encode ? src : dst,
528                              x->skip_encode ? src_stride : dst_stride,
529                              dst, dst_stride, i, j, plane);
530      if (!x->skip_recode) {
531        vp9_subtract_block(16, 16, src_diff, diff_stride,
532                           src, src_stride, dst, dst_stride);
533        vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
534        vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
535                       p->quant, p->quant_shift, qcoeff, dqcoeff,
536                       pd->dequant, p->zbin_extra, eob, scan_order->scan,
537                       scan_order->iscan);
538      }
539      if (!x->skip_encode && *eob)
540        vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
541      break;
542    case TX_8X8:
543      tx_type = get_tx_type(pd->plane_type, xd);
544      scan_order = &vp9_scan_orders[TX_8X8][tx_type];
545      mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
546      vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
547                              x->skip_encode ? src : dst,
548                              x->skip_encode ? src_stride : dst_stride,
549                              dst, dst_stride, i, j, plane);
550      if (!x->skip_recode) {
551        vp9_subtract_block(8, 8, src_diff, diff_stride,
552                           src, src_stride, dst, dst_stride);
553        vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
554        vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
555                       p->quant_shift, qcoeff, dqcoeff,
556                       pd->dequant, p->zbin_extra, eob, scan_order->scan,
557                       scan_order->iscan);
558      }
559      if (!x->skip_encode && *eob)
560        vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
561      break;
562    case TX_4X4:
563      tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
564      scan_order = &vp9_scan_orders[TX_4X4][tx_type];
565      mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
566      vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
567                              x->skip_encode ? src : dst,
568                              x->skip_encode ? src_stride : dst_stride,
569                              dst, dst_stride, i, j, plane);
570
571      if (!x->skip_recode) {
572        vp9_subtract_block(4, 4, src_diff, diff_stride,
573                           src, src_stride, dst, dst_stride);
574        if (tx_type != DCT_DCT)
575          vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
576        else
577          x->fwd_txm4x4(src_diff, coeff, diff_stride);
578        vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
579                       p->quant_shift, qcoeff, dqcoeff,
580                       pd->dequant, p->zbin_extra, eob, scan_order->scan,
581                       scan_order->iscan);
582      }
583
584      if (!x->skip_encode && *eob) {
585        if (tx_type == DCT_DCT)
586          // this is like vp9_short_idct4x4 but has a special case around eob<=1
587          // which is significant (not just an optimization) for the lossless
588          // case.
589          xd->itxm_add(dqcoeff, dst, dst_stride, *eob);
590        else
591          vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
592      }
593      break;
594    default:
595      assert(0);
596  }
597  if (*eob)
598    *(args->skip) = 0;
599}
600
601void vp9_encode_block_intra(MACROBLOCK *x, int plane, int block,
602                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
603                            unsigned char *skip) {
604  struct encode_b_args arg = {x, NULL, skip};
605  encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
606}
607
608
609void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
610  const MACROBLOCKD *const xd = &x->e_mbd;
611  struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip};
612
613  vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block_intra,
614                                         &arg);
615}
616
617int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) {
618  MB_MODE_INFO * mbmi = &x->e_mbd.mi[0]->mbmi;
619  x->skip_encode = 0;
620  mbmi->mode = DC_PRED;
621  mbmi->ref_frame[0] = INTRA_FRAME;
622  mbmi->tx_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_16X16 ? TX_16X16
623                                                                 : TX_8X8)
624                                   : TX_4X4;
625  vp9_encode_intra_block_plane(x, mbmi->sb_type, 0);
626  return vp9_get_mb_ss(x->plane[0].src_diff);
627}
628