vp9_encodemb.c revision 1184aebb761cbeac9124c37189a80a1a58f04b6b
1ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/*
2ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *
4ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  Use of this source code is governed by a BSD-style license
5ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  that can be found in the LICENSE file in the root of the source
6ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  tree. An additional intellectual property rights grant can be found
7ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  in the file PATENTS.  All contributing project authors may
8ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  be found in the AUTHORS file in the root of the source tree.
9ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */
10ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
11ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "./vpx_config.h"
12ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/encoder/vp9_encodemb.h"
13ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/common/vp9_reconinter.h"
14ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/encoder/vp9_quantize.h"
15ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/encoder/vp9_tokenize.h"
16ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/common/vp9_reconintra.h"
17ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vpx_mem/vpx_mem.h"
18ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/encoder/vp9_rdopt.h"
19ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/common/vp9_systemdependent.h"
20ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9_rtcd.h"
21ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
22ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangDECLARE_ALIGNED(16, extern const uint8_t,
23ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);
24ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2591037db265ecdd914a26e056cf69207b4f50924ehkuangvoid vp9_subtract_block_c(int rows, int cols,
2691037db265ecdd914a26e056cf69207b4f50924ehkuang                          int16_t *diff_ptr, ptrdiff_t diff_stride,
2791037db265ecdd914a26e056cf69207b4f50924ehkuang                          const uint8_t *src_ptr, ptrdiff_t src_stride,
2891037db265ecdd914a26e056cf69207b4f50924ehkuang                          const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
29ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int r, c;
30ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
31ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (r = 0; r < rows; r++) {
32ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    for (c = 0; c < cols; c++)
33ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      diff_ptr[c] = src_ptr[c] - pred_ptr[c];
34ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
35ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    diff_ptr += diff_stride;
36ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    pred_ptr += pred_stride;
37ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    src_ptr  += src_stride;
38ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
39ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
40ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
41ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangstatic void inverse_transform_b_4x4_add(MACROBLOCKD *xd, int eob,
42ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                        int16_t *dqcoeff, uint8_t *dest,
43ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                        int stride) {
44ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (eob <= 1)
45ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    xd->inv_txm4x4_1_add(dqcoeff, dest, stride);
46ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  else
47ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    xd->inv_txm4x4_add(dqcoeff, dest, stride);
48ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
49ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
50f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuangstatic void inverse_transform_b_8x8_add(int eob,
51f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang                                        int16_t *dqcoeff, uint8_t *dest,
52f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang                                        int stride) {
53f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  if (eob <= 1)
54f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang    vp9_short_idct8x8_1_add(dqcoeff, dest, stride);
55f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  else if (eob <= 10)
56f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang    vp9_short_idct10_8x8_add(dqcoeff, dest, stride);
57f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  else
58f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang    vp9_short_idct8x8_add(dqcoeff, dest, stride);
59f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang}
60f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang
61f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuangstatic void inverse_transform_b_16x16_add(int eob,
62f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang                                          int16_t *dqcoeff, uint8_t *dest,
63f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang                                          int stride) {
64f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  if (eob <= 1)
65f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang    vp9_short_idct16x16_1_add(dqcoeff, dest, stride);
66f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  else if (eob <= 10)
67f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang    vp9_short_idct10_16x16_add(dqcoeff, dest, stride);
68f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  else
69f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang    vp9_short_idct16x16_add(dqcoeff, dest, stride);
70f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang}
71ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
721184aebb761cbeac9124c37189a80a1a58f04b6bhkuangstatic void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
73ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  struct macroblock_plane *const p = &x->plane[plane];
74ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const MACROBLOCKD *const xd = &x->e_mbd;
75ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const struct macroblockd_plane *const pd = &xd->plane[plane];
76ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const int bw = plane_block_width(bsize, pd);
77ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const int bh = plane_block_height(bsize, pd);
78ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
79ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  vp9_subtract_block(bh, bw, p->src_diff, bw,
80ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                     p->src.buf, p->src.stride,
81ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                     pd->dst.buf, pd->dst.stride);
82ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
83ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
841184aebb761cbeac9124c37189a80a1a58f04b6bhkuangvoid vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE bsize) {
85ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  subtract_plane(x, bsize, 0);
86ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
87ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
881184aebb761cbeac9124c37189a80a1a58f04b6bhkuangvoid vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize) {
89ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int i;
90ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
91ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (i = 1; i < MAX_MB_PLANE; i++)
92ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    subtract_plane(x, bsize, i);
93ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
94ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
951184aebb761cbeac9124c37189a80a1a58f04b6bhkuangvoid vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
96ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  vp9_subtract_sby(x, bsize);
97ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  vp9_subtract_sbuv(x, bsize);
98ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
99ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
100ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
101ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
102ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangtypedef struct vp9_token_state vp9_token_state;
103ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
104ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangstruct vp9_token_state {
105ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int           rate;
106ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int           error;
107ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int           next;
108ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  signed char   token;
109ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  short         qc;
110ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang};
111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
112ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang// TODO: experiments to find optimal multiple numbers
113ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define Y1_RD_MULT 4
114ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define UV_RD_MULT 2
115ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
116ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangstatic const int plane_rd_mult[4] = {
117ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  Y1_RD_MULT,
118ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  UV_RD_MULT,
119ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang};
120ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
121ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define UPDATE_RD_COST()\
122ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang{\
123ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
124ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
125ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (rd_cost0 == rd_cost1) {\
126ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
127ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
128ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }\
129ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
130ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
131ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang// This function is a place holder for now but may ultimately need
132ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang// to scan previous tokens to work out the correct context.
13391037db265ecdd914a26e056cf69207b4f50924ehkuangstatic int trellis_get_coeff_context(const int16_t *scan,
13491037db265ecdd914a26e056cf69207b4f50924ehkuang                                     const int16_t *nb,
135ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                     int idx, int token,
13691037db265ecdd914a26e056cf69207b4f50924ehkuang                                     uint8_t *token_cache) {
137ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int bak = token_cache[scan[idx]], pt;
138ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  token_cache[scan[idx]] = vp9_pt_energy_class[token];
13991037db265ecdd914a26e056cf69207b4f50924ehkuang  pt = get_coef_context(nb, token_cache, idx + 1);
140ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  token_cache[scan[idx]] = bak;
141ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return pt;
142ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
143ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
144f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuangstatic void optimize_b(MACROBLOCK *mb,
1451184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                       int plane, int block, BLOCK_SIZE plane_bsize,
146ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
147ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                       TX_SIZE tx_size) {
148ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MACROBLOCKD *const xd = &mb->e_mbd;
1491184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  struct macroblockd_plane *pd = &xd->plane[plane];
1501184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int ref = is_inter_block(&xd->this_mi->mbmi);
151ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  vp9_token_state tokens[1025][2];
152ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned best_index[1025][2];
1531184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block);
154ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int16_t *qcoeff_ptr;
155ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int16_t *dqcoeff_ptr;
1561184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int eob = pd->eobs[block], final_eob, sz = 0;
157ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const int i0 = 0;
158ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int rc, x, next, i;
159ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int64_t rdmult, rddiv, rd_cost0, rd_cost1;
160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int rate0, rate1, error0, error1, t0, t1;
161ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int best, band, pt;
1621184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  PLANE_TYPE type = pd->plane_type;
163ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int err_mult = plane_rd_mult[type];
16491037db265ecdd914a26e056cf69207b4f50924ehkuang  int default_eob;
16591037db265ecdd914a26e056cf69207b4f50924ehkuang  const int16_t *scan, *nb;
166ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const int mul = 1 + (tx_size == TX_32X32);
167ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t token_cache[1024];
1681184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int ib = txfrm_block_to_raster_block(plane_bsize, tx_size, block);
1691184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int16_t *dequant_ptr = pd->dequant;
170ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const uint8_t * band_translate;
171ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
172ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  assert((!type && !plane) || (type && plane));
1731184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block);
1741184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block);
175ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  switch (tx_size) {
176ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    default:
17791037db265ecdd914a26e056cf69207b4f50924ehkuang    case TX_4X4:
178ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      default_eob = 16;
17991037db265ecdd914a26e056cf69207b4f50924ehkuang      scan = get_scan_4x4(get_tx_type_4x4(type, xd, ib));
180ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      band_translate = vp9_coefband_trans_4x4;
181ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
18291037db265ecdd914a26e056cf69207b4f50924ehkuang    case TX_8X8:
18391037db265ecdd914a26e056cf69207b4f50924ehkuang      scan = get_scan_8x8(get_tx_type_8x8(type, xd));
184ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      default_eob = 64;
185ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      band_translate = vp9_coefband_trans_8x8plus;
186ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
18791037db265ecdd914a26e056cf69207b4f50924ehkuang    case TX_16X16:
18891037db265ecdd914a26e056cf69207b4f50924ehkuang      scan = get_scan_16x16(get_tx_type_16x16(type, xd));
189ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      default_eob = 256;
190ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      band_translate = vp9_coefband_trans_8x8plus;
191ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
192ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    case TX_32X32:
193ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      scan = vp9_default_scan_32x32;
194ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      default_eob = 1024;
195ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      band_translate = vp9_coefband_trans_8x8plus;
196ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
197ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
198ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  assert(eob <= default_eob);
199ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
200ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
201ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  rdmult = mb->rdmult * err_mult;
2021184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (mb->e_mbd.mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME)
203ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    rdmult = (rdmult * 9) >> 4;
204ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  rddiv = mb->rddiv;
205ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  /* Initialize the sentinel node of the trellis. */
206ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  tokens[eob][0].rate = 0;
207ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  tokens[eob][0].error = 0;
208ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  tokens[eob][0].next = default_eob;
209ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  tokens[eob][0].token = DCT_EOB_TOKEN;
210ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  tokens[eob][0].qc = 0;
211ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  *(tokens[eob] + 1) = *(tokens[eob] + 0);
212ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  next = eob;
213ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (i = 0; i < eob; i++)
214ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[
215ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        qcoeff_ptr[scan[i]]].token];
21691037db265ecdd914a26e056cf69207b4f50924ehkuang  nb = vp9_get_coef_neighbors_handle(scan);
217ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
218ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (i = eob; i-- > i0;) {
219ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    int base_bits, d2, dx;
220ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
221ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    rc = scan[i];
222ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x = qcoeff_ptr[rc];
223ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /* Only add a trellis state for non-zero coefficients. */
224ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (x) {
225ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      int shortcut = 0;
226ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      error0 = tokens[next][0].error;
227ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      error1 = tokens[next][1].error;
228ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      /* Evaluate the first possibility for this state. */
229ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      rate0 = tokens[next][0].rate;
230ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      rate1 = tokens[next][1].rate;
231ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      t0 = (vp9_dct_value_tokens_ptr + x)->token;
232ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      /* Consider both possible successor states. */
233ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (next < default_eob) {
234ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        band = get_coef_band(band_translate, i + 1);
23591037db265ecdd914a26e056cf69207b4f50924ehkuang        pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
236ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        rate0 +=
237f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang          mb->token_costs[tx_size][type][ref][band][0][pt]
23891037db265ecdd914a26e056cf69207b4f50924ehkuang                         [tokens[next][0].token];
239ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        rate1 +=
240f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang          mb->token_costs[tx_size][type][ref][band][0][pt]
24191037db265ecdd914a26e056cf69207b4f50924ehkuang                         [tokens[next][1].token];
242ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
243ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      UPDATE_RD_COST();
244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      /* And pick the best. */
245ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      best = rd_cost1 < rd_cost0;
246ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      base_bits = *(vp9_dct_value_cost_ptr + x);
247ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      dx = mul * (dqcoeff_ptr[rc] - coeff_ptr[rc]);
248ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      d2 = dx * dx;
249ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
250ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      tokens[i][0].error = d2 + (best ? error1 : error0);
251ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      tokens[i][0].next = next;
252ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      tokens[i][0].token = t0;
253ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      tokens[i][0].qc = x;
254ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      best_index[i][0] = best;
255ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
256ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      /* Evaluate the second possibility for this state. */
257ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      rate0 = tokens[next][0].rate;
258ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      rate1 = tokens[next][1].rate;
259ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
260ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc]) * mul) &&
261ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) * mul +
262ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                         dequant_ptr[rc != 0]))
263ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        shortcut = 1;
264ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      else
265ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        shortcut = 0;
266ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
267ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (shortcut) {
268ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        sz = -(x < 0);
269ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        x -= 2 * sz + 1;
270ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
271ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
272ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      /* Consider both possible successor states. */
273ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (!x) {
274ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /* If we reduced this coefficient to zero, check to see if
275ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         *  we need to move the EOB back here.
276ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
277ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
278ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang             DCT_EOB_TOKEN : ZERO_TOKEN;
279ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
280ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang             DCT_EOB_TOKEN : ZERO_TOKEN;
281ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      } else {
282ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token;
283ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
284ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (next < default_eob) {
285ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        band = get_coef_band(band_translate, i + 1);
286ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (t0 != DCT_EOB_TOKEN) {
28791037db265ecdd914a26e056cf69207b4f50924ehkuang          pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
288f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang          rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
28991037db265ecdd914a26e056cf69207b4f50924ehkuang                                  [tokens[next][0].token];
290ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
291ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (t1 != DCT_EOB_TOKEN) {
29291037db265ecdd914a26e056cf69207b4f50924ehkuang          pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
293f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang          rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
29491037db265ecdd914a26e056cf69207b4f50924ehkuang                                  [tokens[next][1].token];
295ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
296ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
297ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
298ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      UPDATE_RD_COST();
299ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      /* And pick the best. */
300ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      best = rd_cost1 < rd_cost0;
301ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      base_bits = *(vp9_dct_value_cost_ptr + x);
302ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
303ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (shortcut) {
304ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
305ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        d2 = dx * dx;
306ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
307ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
308ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      tokens[i][1].error = d2 + (best ? error1 : error0);
309ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      tokens[i][1].next = next;
310ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      tokens[i][1].token = best ? t1 : t0;
311ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      tokens[i][1].qc = x;
312ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      best_index[i][1] = best;
313ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      /* Finally, make this the new head of the trellis. */
314ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      next = i;
315ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
316ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /* There's no choice to make for a zero coefficient, so we don't
317ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *  add a new trellis node, but we do need to update the costs.
318ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
319ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    else {
320ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      band = get_coef_band(band_translate, i + 1);
321ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      t0 = tokens[next][0].token;
322ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      t1 = tokens[next][1].token;
323ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      /* Update the cost of each path if we're past the EOB token. */
324ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (t0 != DCT_EOB_TOKEN) {
325ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        tokens[next][0].rate +=
326f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang            mb->token_costs[tx_size][type][ref][band][1][0][t0];
327ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        tokens[next][0].token = ZERO_TOKEN;
328ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
329ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (t1 != DCT_EOB_TOKEN) {
330ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        tokens[next][1].rate +=
331f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang            mb->token_costs[tx_size][type][ref][band][1][0][t1];
332ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        tokens[next][1].token = ZERO_TOKEN;
333ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
33491037db265ecdd914a26e056cf69207b4f50924ehkuang      best_index[i][0] = best_index[i][1] = 0;
335ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      /* Don't update next, because we didn't add a new node. */
336ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
337ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
338ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
339ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  /* Now pick the best path through the whole trellis. */
340ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  band = get_coef_band(band_translate, i + 1);
341ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  pt = combine_entropy_contexts(*a, *l);
342ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  rate0 = tokens[next][0].rate;
343ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  rate1 = tokens[next][1].rate;
344ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  error0 = tokens[next][0].error;
345ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  error1 = tokens[next][1].error;
346ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  t0 = tokens[next][0].token;
347ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  t1 = tokens[next][1].token;
348f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0];
349f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1];
350ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  UPDATE_RD_COST();
351ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best = rd_cost1 < rd_cost0;
352ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  final_eob = i0 - 1;
353ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  vpx_memset(qcoeff_ptr, 0, sizeof(*qcoeff_ptr) * (16 << (tx_size * 2)));
354ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  vpx_memset(dqcoeff_ptr, 0, sizeof(*dqcoeff_ptr) * (16 << (tx_size * 2)));
355ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (i = next; i < eob; i = next) {
356ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x = tokens[i][best].qc;
357ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (x) {
358ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      final_eob = i;
359ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
360ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    rc = scan[i];
361ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    qcoeff_ptr[rc] = x;
362ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul;
363ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
364ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    next = tokens[i][best].next;
365ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    best = best_index[i][best];
366ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
367ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  final_eob++;
368ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
369ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  xd->plane[plane].eobs[block] = final_eob;
370ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  *a = *l = (final_eob > 0);
371ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
372ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
3731184aebb761cbeac9124c37189a80a1a58f04b6bhkuangvoid vp9_optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
3741184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                    TX_SIZE tx_size, MACROBLOCK *mb, struct optimize_ctx *ctx) {
375ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int x, y;
3761184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
3771184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  optimize_b(mb, plane, block, plane_bsize,
3781184aebb761cbeac9124c37189a80a1a58f04b6bhkuang             &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size);
379ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
380ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
3811184aebb761cbeac9124c37189a80a1a58f04b6bhkuangstatic void optimize_init_b(int plane, BLOCK_SIZE bsize,
3821184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                            struct encode_b_args *args) {
383f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  const MACROBLOCKD *xd = &args->x->e_mbd;
384f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  const struct macroblockd_plane* const pd = &xd->plane[plane];
3851184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
3861184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
3871184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
3881184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const MB_MODE_INFO *mbmi = &xd->this_mi->mbmi;
3891184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size;
390f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  int i;
391f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang
392f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  switch (tx_size) {
393f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang    case TX_4X4:
394f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang      vpx_memcpy(args->ctx->ta[plane], pd->above_context,
3951184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                 sizeof(ENTROPY_CONTEXT) * num_4x4_w);
396f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang      vpx_memcpy(args->ctx->tl[plane], pd->left_context,
3971184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                 sizeof(ENTROPY_CONTEXT) * num_4x4_h);
398f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang      break;
399f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang    case TX_8X8:
4001184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      for (i = 0; i < num_4x4_w; i += 2)
401f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang        args->ctx->ta[plane][i] = !!*(uint16_t *)&pd->above_context[i];
4021184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      for (i = 0; i < num_4x4_h; i += 2)
403f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang        args->ctx->tl[plane][i] = !!*(uint16_t *)&pd->left_context[i];
404f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang      break;
405f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang    case TX_16X16:
4061184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      for (i = 0; i < num_4x4_w; i += 4)
407f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang        args->ctx->ta[plane][i] = !!*(uint32_t *)&pd->above_context[i];
4081184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      for (i = 0; i < num_4x4_h; i += 4)
409f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang        args->ctx->tl[plane][i] = !!*(uint32_t *)&pd->left_context[i];
410f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang      break;
411f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang    case TX_32X32:
4121184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      for (i = 0; i < num_4x4_w; i += 8)
413f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang        args->ctx->ta[plane][i] = !!*(uint64_t *)&pd->above_context[i];
4141184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      for (i = 0; i < num_4x4_h; i += 8)
415f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang        args->ctx->tl[plane][i] = !!*(uint64_t *)&pd->left_context[i];
416f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang      break;
417f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang    default:
418f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang      assert(0);
419ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
420ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
421ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4221184aebb761cbeac9124c37189a80a1a58f04b6bhkuangvoid vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
4231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                     TX_SIZE tx_size, void *arg) {
424ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  struct encode_b_args* const args = arg;
425ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MACROBLOCK* const x = args->x;
426ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MACROBLOCKD* const xd = &x->e_mbd;
42791037db265ecdd914a26e056cf69207b4f50924ehkuang  struct macroblock_plane *const p = &x->plane[plane];
42891037db265ecdd914a26e056cf69207b4f50924ehkuang  struct macroblockd_plane *const pd = &xd->plane[plane];
4291184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int16_t *coeff = BLOCK_OFFSET(p->coeff, block);
4301184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block);
4311184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
43291037db265ecdd914a26e056cf69207b4f50924ehkuang  const int16_t *scan, *iscan;
43391037db265ecdd914a26e056cf69207b4f50924ehkuang  uint16_t *eob = &pd->eobs[block];
4341184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl;
43591037db265ecdd914a26e056cf69207b4f50924ehkuang  const int twl = bwl - tx_size, twmask = (1 << twl) - 1;
43691037db265ecdd914a26e056cf69207b4f50924ehkuang  int xoff, yoff;
43791037db265ecdd914a26e056cf69207b4f50924ehkuang  int16_t *src_diff;
438ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
43991037db265ecdd914a26e056cf69207b4f50924ehkuang  switch (tx_size) {
440ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    case TX_32X32:
44191037db265ecdd914a26e056cf69207b4f50924ehkuang      scan = vp9_default_scan_32x32;
44291037db265ecdd914a26e056cf69207b4f50924ehkuang      iscan = vp9_default_iscan_32x32;
44391037db265ecdd914a26e056cf69207b4f50924ehkuang      block >>= 6;
44491037db265ecdd914a26e056cf69207b4f50924ehkuang      xoff = 32 * (block & twmask);
44591037db265ecdd914a26e056cf69207b4f50924ehkuang      yoff = 32 * (block >> twl);
44691037db265ecdd914a26e056cf69207b4f50924ehkuang      src_diff = p->src_diff + 4 * bw * yoff + xoff;
4471184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      if (x->use_lp32x32fdct)
44891037db265ecdd914a26e056cf69207b4f50924ehkuang        vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8);
449ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      else
45091037db265ecdd914a26e056cf69207b4f50924ehkuang        vp9_short_fdct32x32(src_diff, coeff, bw * 8);
45191037db265ecdd914a26e056cf69207b4f50924ehkuang      vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
45291037db265ecdd914a26e056cf69207b4f50924ehkuang                           p->quant, p->quant_shift, qcoeff, dqcoeff,
45391037db265ecdd914a26e056cf69207b4f50924ehkuang                           pd->dequant, p->zbin_extra, eob, scan, iscan);
454ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
455ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    case TX_16X16:
45691037db265ecdd914a26e056cf69207b4f50924ehkuang      scan = vp9_default_scan_16x16;
45791037db265ecdd914a26e056cf69207b4f50924ehkuang      iscan = vp9_default_iscan_16x16;
45891037db265ecdd914a26e056cf69207b4f50924ehkuang      block >>= 4;
45991037db265ecdd914a26e056cf69207b4f50924ehkuang      xoff = 16 * (block & twmask);
46091037db265ecdd914a26e056cf69207b4f50924ehkuang      yoff = 16 * (block >> twl);
46191037db265ecdd914a26e056cf69207b4f50924ehkuang      src_diff = p->src_diff + 4 * bw * yoff + xoff;
46291037db265ecdd914a26e056cf69207b4f50924ehkuang      x->fwd_txm16x16(src_diff, coeff, bw * 8);
46391037db265ecdd914a26e056cf69207b4f50924ehkuang      vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
46491037db265ecdd914a26e056cf69207b4f50924ehkuang                     p->quant, p->quant_shift, qcoeff, dqcoeff,
46591037db265ecdd914a26e056cf69207b4f50924ehkuang                     pd->dequant, p->zbin_extra, eob, scan, iscan);
466ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
467ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    case TX_8X8:
46891037db265ecdd914a26e056cf69207b4f50924ehkuang      scan = vp9_default_scan_8x8;
46991037db265ecdd914a26e056cf69207b4f50924ehkuang      iscan = vp9_default_iscan_8x8;
47091037db265ecdd914a26e056cf69207b4f50924ehkuang      block >>= 2;
47191037db265ecdd914a26e056cf69207b4f50924ehkuang      xoff = 8 * (block & twmask);
47291037db265ecdd914a26e056cf69207b4f50924ehkuang      yoff = 8 * (block >> twl);
47391037db265ecdd914a26e056cf69207b4f50924ehkuang      src_diff = p->src_diff + 4 * bw * yoff + xoff;
47491037db265ecdd914a26e056cf69207b4f50924ehkuang      x->fwd_txm8x8(src_diff, coeff, bw * 8);
47591037db265ecdd914a26e056cf69207b4f50924ehkuang      vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
47691037db265ecdd914a26e056cf69207b4f50924ehkuang                     p->quant, p->quant_shift, qcoeff, dqcoeff,
47791037db265ecdd914a26e056cf69207b4f50924ehkuang                     pd->dequant, p->zbin_extra, eob, scan, iscan);
478ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
479ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    case TX_4X4:
48091037db265ecdd914a26e056cf69207b4f50924ehkuang      scan = vp9_default_scan_4x4;
48191037db265ecdd914a26e056cf69207b4f50924ehkuang      iscan = vp9_default_iscan_4x4;
48291037db265ecdd914a26e056cf69207b4f50924ehkuang      xoff = 4 * (block & twmask);
48391037db265ecdd914a26e056cf69207b4f50924ehkuang      yoff = 4 * (block >> twl);
48491037db265ecdd914a26e056cf69207b4f50924ehkuang      src_diff = p->src_diff + 4 * bw * yoff + xoff;
48591037db265ecdd914a26e056cf69207b4f50924ehkuang      x->fwd_txm4x4(src_diff, coeff, bw * 8);
48691037db265ecdd914a26e056cf69207b4f50924ehkuang      vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
48791037db265ecdd914a26e056cf69207b4f50924ehkuang                     p->quant, p->quant_shift, qcoeff, dqcoeff,
48891037db265ecdd914a26e056cf69207b4f50924ehkuang                     pd->dequant, p->zbin_extra, eob, scan, iscan);
489ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
490ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    default:
491ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      assert(0);
492ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
493ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
494ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4951184aebb761cbeac9124c37189a80a1a58f04b6bhkuangstatic void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
4961184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                         TX_SIZE tx_size, void *arg) {
497ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  struct encode_b_args *const args = arg;
498ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MACROBLOCK *const x = args->x;
499ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MACROBLOCKD *const xd = &x->e_mbd;
500ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  struct macroblockd_plane *const pd = &xd->plane[plane];
5011184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size,
5021184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                                       block);
5031184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
5041184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
5051184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block,
506ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                                 pd->dst.buf, pd->dst.stride);
5071184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
508ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
509ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (x->optimize)
5101184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx);
511ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5121184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (x->skip_encode || pd->eobs[block] == 0)
51391037db265ecdd914a26e056cf69207b4f50924ehkuang    return;
51491037db265ecdd914a26e056cf69207b4f50924ehkuang
5151184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  switch (tx_size) {
516ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    case TX_32X32:
517ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride);
518ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
519ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    case TX_16X16:
520f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang      inverse_transform_b_16x16_add(pd->eobs[block], dqcoeff, dst,
521f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang                                    pd->dst.stride);
522ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
523ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    case TX_8X8:
524f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang      inverse_transform_b_8x8_add(pd->eobs[block], dqcoeff, dst,
525f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang                                  pd->dst.stride);
526ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
527ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    case TX_4X4:
52891037db265ecdd914a26e056cf69207b4f50924ehkuang      // this is like vp9_short_idct4x4 but has a special case around eob<=1
52991037db265ecdd914a26e056cf69207b4f50924ehkuang      // which is significant (not just an optimization) for the lossless
53091037db265ecdd914a26e056cf69207b4f50924ehkuang      // case.
53191037db265ecdd914a26e056cf69207b4f50924ehkuang      inverse_transform_b_4x4_add(xd, pd->eobs[block], dqcoeff,
53291037db265ecdd914a26e056cf69207b4f50924ehkuang                                  dst, pd->dst.stride);
533ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
5341184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    default:
5351184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      assert(!"Invalid transform size");
536ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
537ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
538ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5391184aebb761cbeac9124c37189a80a1a58f04b6bhkuangvoid vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) {
540ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MACROBLOCKD *const xd = &x->e_mbd;
541ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  struct optimize_ctx ctx;
5421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  struct encode_b_args arg = {x, &ctx};
543ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
544ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  vp9_subtract_sby(x, bsize);
545ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (x->optimize)
546f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang    optimize_init_b(0, bsize, &arg);
547ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
548ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  foreach_transformed_block_in_plane(xd, bsize, 0, encode_block, &arg);
549ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
550ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5511184aebb761cbeac9124c37189a80a1a58f04b6bhkuangvoid vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
552ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MACROBLOCKD *const xd = &x->e_mbd;
553ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  struct optimize_ctx ctx;
5541184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  struct encode_b_args arg = {x, &ctx};
555ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
556ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  vp9_subtract_sb(x, bsize);
557f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang
558f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  if (x->optimize) {
559f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang    int i;
560f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang    for (i = 0; i < MAX_MB_PLANE; ++i)
561f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang      optimize_init_b(i, bsize, &arg);
562f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  }
563ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
564ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  foreach_transformed_block(xd, bsize, encode_block, &arg);
565ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
566ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5671184aebb761cbeac9124c37189a80a1a58f04b6bhkuangvoid vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
5681184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                            TX_SIZE tx_size, void *arg) {
569ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  struct encode_b_args* const args = arg;
570ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MACROBLOCK *const x = args->x;
571ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MACROBLOCKD *const xd = &x->e_mbd;
5721184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  MB_MODE_INFO *mbmi = &xd->this_mi->mbmi;
573ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  struct macroblock_plane *const p = &x->plane[plane];
574ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  struct macroblockd_plane *const pd = &xd->plane[plane];
5751184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int16_t *coeff = BLOCK_OFFSET(p->coeff, block);
5761184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block);
5771184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
57891037db265ecdd914a26e056cf69207b4f50924ehkuang  const int16_t *scan, *iscan;
579ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  TX_TYPE tx_type;
58091037db265ecdd914a26e056cf69207b4f50924ehkuang  MB_PREDICTION_MODE mode;
5811184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl;
58291037db265ecdd914a26e056cf69207b4f50924ehkuang  const int twl = bwl - tx_size, twmask = (1 << twl) - 1;
58391037db265ecdd914a26e056cf69207b4f50924ehkuang  int xoff, yoff;
58491037db265ecdd914a26e056cf69207b4f50924ehkuang  uint8_t *src, *dst;
58591037db265ecdd914a26e056cf69207b4f50924ehkuang  int16_t *src_diff;
58691037db265ecdd914a26e056cf69207b4f50924ehkuang  uint16_t *eob = &pd->eobs[block];
587ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5881184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0)
5891184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    extend_for_intra(xd, plane_bsize, plane, block, tx_size);
590ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
591ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // if (x->optimize)
5921184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx);
593ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
59491037db265ecdd914a26e056cf69207b4f50924ehkuang  switch (tx_size) {
595ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    case TX_32X32:
59691037db265ecdd914a26e056cf69207b4f50924ehkuang      scan = vp9_default_scan_32x32;
59791037db265ecdd914a26e056cf69207b4f50924ehkuang      iscan = vp9_default_iscan_32x32;
59891037db265ecdd914a26e056cf69207b4f50924ehkuang      mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
59991037db265ecdd914a26e056cf69207b4f50924ehkuang      block >>= 6;
60091037db265ecdd914a26e056cf69207b4f50924ehkuang      xoff = 32 * (block & twmask);
60191037db265ecdd914a26e056cf69207b4f50924ehkuang      yoff = 32 * (block >> twl);
60291037db265ecdd914a26e056cf69207b4f50924ehkuang      dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
60391037db265ecdd914a26e056cf69207b4f50924ehkuang      src = p->src.buf + yoff * p->src.stride + xoff;
60491037db265ecdd914a26e056cf69207b4f50924ehkuang      src_diff = p->src_diff + 4 * bw * yoff + xoff;
60591037db265ecdd914a26e056cf69207b4f50924ehkuang      vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode,
60691037db265ecdd914a26e056cf69207b4f50924ehkuang                              dst, pd->dst.stride, dst, pd->dst.stride);
60791037db265ecdd914a26e056cf69207b4f50924ehkuang      vp9_subtract_block(32, 32, src_diff, bw * 4,
60891037db265ecdd914a26e056cf69207b4f50924ehkuang                         src, p->src.stride, dst, pd->dst.stride);
6091184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      if (x->use_lp32x32fdct)
61091037db265ecdd914a26e056cf69207b4f50924ehkuang        vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8);
61191037db265ecdd914a26e056cf69207b4f50924ehkuang      else
61291037db265ecdd914a26e056cf69207b4f50924ehkuang        vp9_short_fdct32x32(src_diff, coeff, bw * 8);
61391037db265ecdd914a26e056cf69207b4f50924ehkuang      vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
61491037db265ecdd914a26e056cf69207b4f50924ehkuang                           p->quant, p->quant_shift, qcoeff, dqcoeff,
61591037db265ecdd914a26e056cf69207b4f50924ehkuang                           pd->dequant, p->zbin_extra, eob, scan, iscan);
61691037db265ecdd914a26e056cf69207b4f50924ehkuang      if (!x->skip_encode && *eob)
617ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride);
618ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
619ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    case TX_16X16:
62091037db265ecdd914a26e056cf69207b4f50924ehkuang      tx_type = get_tx_type_16x16(pd->plane_type, xd);
62191037db265ecdd914a26e056cf69207b4f50924ehkuang      scan = get_scan_16x16(tx_type);
62291037db265ecdd914a26e056cf69207b4f50924ehkuang      iscan = get_iscan_16x16(tx_type);
62391037db265ecdd914a26e056cf69207b4f50924ehkuang      mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
62491037db265ecdd914a26e056cf69207b4f50924ehkuang      block >>= 4;
62591037db265ecdd914a26e056cf69207b4f50924ehkuang      xoff = 16 * (block & twmask);
62691037db265ecdd914a26e056cf69207b4f50924ehkuang      yoff = 16 * (block >> twl);
62791037db265ecdd914a26e056cf69207b4f50924ehkuang      dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
62891037db265ecdd914a26e056cf69207b4f50924ehkuang      src = p->src.buf + yoff * p->src.stride + xoff;
62991037db265ecdd914a26e056cf69207b4f50924ehkuang      src_diff = p->src_diff + 4 * bw * yoff + xoff;
63091037db265ecdd914a26e056cf69207b4f50924ehkuang      vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode,
63191037db265ecdd914a26e056cf69207b4f50924ehkuang                              dst, pd->dst.stride, dst, pd->dst.stride);
63291037db265ecdd914a26e056cf69207b4f50924ehkuang      vp9_subtract_block(16, 16, src_diff, bw * 4,
63391037db265ecdd914a26e056cf69207b4f50924ehkuang                         src, p->src.stride, dst, pd->dst.stride);
63491037db265ecdd914a26e056cf69207b4f50924ehkuang      if (tx_type != DCT_DCT)
63591037db265ecdd914a26e056cf69207b4f50924ehkuang        vp9_short_fht16x16(src_diff, coeff, bw * 4, tx_type);
636ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      else
63791037db265ecdd914a26e056cf69207b4f50924ehkuang        x->fwd_txm16x16(src_diff, coeff, bw * 8);
6381184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
6391184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                     p->quant, p->quant_shift, qcoeff, dqcoeff,
64091037db265ecdd914a26e056cf69207b4f50924ehkuang                     pd->dequant, p->zbin_extra, eob, scan, iscan);
64191037db265ecdd914a26e056cf69207b4f50924ehkuang      if (!x->skip_encode && *eob) {
64291037db265ecdd914a26e056cf69207b4f50924ehkuang        if (tx_type == DCT_DCT)
643f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang          inverse_transform_b_16x16_add(*eob, dqcoeff, dst, pd->dst.stride);
64491037db265ecdd914a26e056cf69207b4f50924ehkuang        else
64591037db265ecdd914a26e056cf69207b4f50924ehkuang          vp9_short_iht16x16_add(dqcoeff, dst, pd->dst.stride, tx_type);
64691037db265ecdd914a26e056cf69207b4f50924ehkuang      }
647ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
648ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    case TX_8X8:
64991037db265ecdd914a26e056cf69207b4f50924ehkuang      tx_type = get_tx_type_8x8(pd->plane_type, xd);
65091037db265ecdd914a26e056cf69207b4f50924ehkuang      scan = get_scan_8x8(tx_type);
65191037db265ecdd914a26e056cf69207b4f50924ehkuang      iscan = get_iscan_8x8(tx_type);
65291037db265ecdd914a26e056cf69207b4f50924ehkuang      mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
65391037db265ecdd914a26e056cf69207b4f50924ehkuang      block >>= 2;
65491037db265ecdd914a26e056cf69207b4f50924ehkuang      xoff = 8 * (block & twmask);
65591037db265ecdd914a26e056cf69207b4f50924ehkuang      yoff = 8 * (block >> twl);
65691037db265ecdd914a26e056cf69207b4f50924ehkuang      dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
65791037db265ecdd914a26e056cf69207b4f50924ehkuang      src = p->src.buf + yoff * p->src.stride + xoff;
65891037db265ecdd914a26e056cf69207b4f50924ehkuang      src_diff = p->src_diff + 4 * bw * yoff + xoff;
65991037db265ecdd914a26e056cf69207b4f50924ehkuang      vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode,
66091037db265ecdd914a26e056cf69207b4f50924ehkuang                              dst, pd->dst.stride, dst, pd->dst.stride);
66191037db265ecdd914a26e056cf69207b4f50924ehkuang      vp9_subtract_block(8, 8, src_diff, bw * 4,
66291037db265ecdd914a26e056cf69207b4f50924ehkuang                         src, p->src.stride, dst, pd->dst.stride);
66391037db265ecdd914a26e056cf69207b4f50924ehkuang      if (tx_type != DCT_DCT)
66491037db265ecdd914a26e056cf69207b4f50924ehkuang        vp9_short_fht8x8(src_diff, coeff, bw * 4, tx_type);
665ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      else
66691037db265ecdd914a26e056cf69207b4f50924ehkuang        x->fwd_txm8x8(src_diff, coeff, bw * 8);
66791037db265ecdd914a26e056cf69207b4f50924ehkuang      vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
66891037db265ecdd914a26e056cf69207b4f50924ehkuang                     p->quant_shift, qcoeff, dqcoeff,
66991037db265ecdd914a26e056cf69207b4f50924ehkuang                     pd->dequant, p->zbin_extra, eob, scan, iscan);
67091037db265ecdd914a26e056cf69207b4f50924ehkuang      if (!x->skip_encode && *eob) {
67191037db265ecdd914a26e056cf69207b4f50924ehkuang        if (tx_type == DCT_DCT)
672f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang          inverse_transform_b_8x8_add(*eob, dqcoeff, dst, pd->dst.stride);
67391037db265ecdd914a26e056cf69207b4f50924ehkuang        else
67491037db265ecdd914a26e056cf69207b4f50924ehkuang          vp9_short_iht8x8_add(dqcoeff, dst, pd->dst.stride, tx_type);
67591037db265ecdd914a26e056cf69207b4f50924ehkuang      }
676ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
677ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    case TX_4X4:
67891037db265ecdd914a26e056cf69207b4f50924ehkuang      tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
67991037db265ecdd914a26e056cf69207b4f50924ehkuang      scan = get_scan_4x4(tx_type);
68091037db265ecdd914a26e056cf69207b4f50924ehkuang      iscan = get_iscan_4x4(tx_type);
681f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang      if (mbmi->sb_type < BLOCK_8X8 && plane == 0)
6821184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        mode = xd->this_mi->bmi[block].as_mode;
683f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang      else
68491037db265ecdd914a26e056cf69207b4f50924ehkuang        mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
685f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang
68691037db265ecdd914a26e056cf69207b4f50924ehkuang      xoff = 4 * (block & twmask);
68791037db265ecdd914a26e056cf69207b4f50924ehkuang      yoff = 4 * (block >> twl);
68891037db265ecdd914a26e056cf69207b4f50924ehkuang      dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
68991037db265ecdd914a26e056cf69207b4f50924ehkuang      src = p->src.buf + yoff * p->src.stride + xoff;
69091037db265ecdd914a26e056cf69207b4f50924ehkuang      src_diff = p->src_diff + 4 * bw * yoff + xoff;
69191037db265ecdd914a26e056cf69207b4f50924ehkuang      vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
69291037db265ecdd914a26e056cf69207b4f50924ehkuang                              dst, pd->dst.stride, dst, pd->dst.stride);
69391037db265ecdd914a26e056cf69207b4f50924ehkuang      vp9_subtract_block(4, 4, src_diff, bw * 4,
69491037db265ecdd914a26e056cf69207b4f50924ehkuang                         src, p->src.stride, dst, pd->dst.stride);
69591037db265ecdd914a26e056cf69207b4f50924ehkuang      if (tx_type != DCT_DCT)
69691037db265ecdd914a26e056cf69207b4f50924ehkuang        vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);
697ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      else
69891037db265ecdd914a26e056cf69207b4f50924ehkuang        x->fwd_txm4x4(src_diff, coeff, bw * 8);
69991037db265ecdd914a26e056cf69207b4f50924ehkuang      vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
70091037db265ecdd914a26e056cf69207b4f50924ehkuang                     p->quant_shift, qcoeff, dqcoeff,
70191037db265ecdd914a26e056cf69207b4f50924ehkuang                     pd->dequant, p->zbin_extra, eob, scan, iscan);
70291037db265ecdd914a26e056cf69207b4f50924ehkuang      if (!x->skip_encode && *eob) {
70391037db265ecdd914a26e056cf69207b4f50924ehkuang        if (tx_type == DCT_DCT)
70491037db265ecdd914a26e056cf69207b4f50924ehkuang          // this is like vp9_short_idct4x4 but has a special case around eob<=1
70591037db265ecdd914a26e056cf69207b4f50924ehkuang          // which is significant (not just an optimization) for the lossless
70691037db265ecdd914a26e056cf69207b4f50924ehkuang          // case.
707f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang          inverse_transform_b_4x4_add(xd, *eob, dqcoeff, dst, pd->dst.stride);
70891037db265ecdd914a26e056cf69207b4f50924ehkuang        else
70991037db265ecdd914a26e056cf69207b4f50924ehkuang          vp9_short_iht4x4_add(dqcoeff, dst, pd->dst.stride, tx_type);
71091037db265ecdd914a26e056cf69207b4f50924ehkuang      }
711ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
71291037db265ecdd914a26e056cf69207b4f50924ehkuang    default:
71391037db265ecdd914a26e056cf69207b4f50924ehkuang      assert(0);
714ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
715ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
716ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
7171184aebb761cbeac9124c37189a80a1a58f04b6bhkuangvoid vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize) {
718ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MACROBLOCKD* const xd = &x->e_mbd;
719ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  struct optimize_ctx ctx;
7201184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  struct encode_b_args arg = {x, &ctx};
721ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
7221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  foreach_transformed_block_in_plane(xd, bsize, 0, vp9_encode_block_intra,
7231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                     &arg);
724ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
7251184aebb761cbeac9124c37189a80a1a58f04b6bhkuangvoid vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize) {
726ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MACROBLOCKD* const xd = &x->e_mbd;
727ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  struct optimize_ctx ctx;
7281184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  struct encode_b_args arg = {x, &ctx};
7291184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  foreach_transformed_block_uv(xd, bsize, vp9_encode_block_intra, &arg);
730ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
731ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
732