vp9_rdopt.c revision ba164dffc5a6795bce97fae02b51ccf3330e15e4
1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12#include <stdio.h>
13#include <math.h>
14#include <limits.h>
15#include <assert.h>
16
17#include "vp9/common/vp9_pragmas.h"
18#include "vp9/encoder/vp9_tokenize.h"
19#include "vp9/encoder/vp9_treewriter.h"
20#include "vp9/encoder/vp9_onyx_int.h"
21#include "vp9/encoder/vp9_modecosts.h"
22#include "vp9/encoder/vp9_encodeintra.h"
23#include "vp9/common/vp9_entropymode.h"
24#include "vp9/common/vp9_reconinter.h"
25#include "vp9/common/vp9_reconintra.h"
26#include "vp9/common/vp9_findnearmv.h"
27#include "vp9/common/vp9_quant_common.h"
28#include "vp9/encoder/vp9_encodemb.h"
29#include "vp9/encoder/vp9_quantize.h"
30#include "vp9/encoder/vp9_variance.h"
31#include "vp9/encoder/vp9_mcomp.h"
32#include "vp9/encoder/vp9_rdopt.h"
33#include "vp9/encoder/vp9_ratectrl.h"
34#include "vpx_mem/vpx_mem.h"
35#include "vp9/common/vp9_systemdependent.h"
36#include "vp9/encoder/vp9_encodemv.h"
37#include "vp9/common/vp9_seg_common.h"
38#include "vp9/common/vp9_pred_common.h"
39#include "vp9/common/vp9_entropy.h"
40#include "vp9_rtcd.h"
41#include "vp9/common/vp9_mvref_common.h"
42#include "vp9/common/vp9_common.h"
43
44#define INVALID_MV 0x80008000
45
46/* Factor to weigh the rate for switchable interp filters */
47#define SWITCHABLE_INTERP_RATE_FACTOR 1
48
49DECLARE_ALIGNED(16, extern const uint8_t,
50                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);
51
52#define I4X4_PRED 0x8000
53#define SPLITMV 0x10000
54
55const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56  {ZEROMV,    LAST_FRAME,   NONE},
57  {DC_PRED,   INTRA_FRAME,  NONE},
58
59  {NEARESTMV, LAST_FRAME,   NONE},
60  {NEARMV,    LAST_FRAME,   NONE},
61
62  {ZEROMV,    GOLDEN_FRAME, NONE},
63  {NEARESTMV, GOLDEN_FRAME, NONE},
64
65  {ZEROMV,    ALTREF_FRAME, NONE},
66  {NEARESTMV, ALTREF_FRAME, NONE},
67
68  {NEARMV,    GOLDEN_FRAME, NONE},
69  {NEARMV,    ALTREF_FRAME, NONE},
70
71  {V_PRED,    INTRA_FRAME,  NONE},
72  {H_PRED,    INTRA_FRAME,  NONE},
73  {D45_PRED,  INTRA_FRAME,  NONE},
74  {D135_PRED, INTRA_FRAME,  NONE},
75  {D117_PRED, INTRA_FRAME,  NONE},
76  {D153_PRED, INTRA_FRAME,  NONE},
77  {D27_PRED,  INTRA_FRAME,  NONE},
78  {D63_PRED,  INTRA_FRAME,  NONE},
79
80  {TM_PRED,   INTRA_FRAME,  NONE},
81
82  {NEWMV,     LAST_FRAME,   NONE},
83  {NEWMV,     GOLDEN_FRAME, NONE},
84  {NEWMV,     ALTREF_FRAME, NONE},
85
86  {SPLITMV,   LAST_FRAME,   NONE},
87  {SPLITMV,   GOLDEN_FRAME, NONE},
88  {SPLITMV,   ALTREF_FRAME, NONE},
89
90  {I4X4_PRED, INTRA_FRAME,  NONE},
91
92  /* compound prediction modes */
93  {ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
94  {NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
95  {NEARMV,    LAST_FRAME,   ALTREF_FRAME},
96
97  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
98  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
99  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
100
101  {NEWMV,     LAST_FRAME,   ALTREF_FRAME},
102  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
103
104  {SPLITMV,   LAST_FRAME,   ALTREF_FRAME},
105  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
106};
107
108// The baseline rd thresholds for breaking out of the rd loop for
109// certain modes are assumed to be based on 8x8 blocks.
110// This table is used to correct for blocks size.
111// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
112static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] =
113  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
114
115#define BASE_RD_THRESH_FREQ_FACT 16
116#define MAX_RD_THRESH_FREQ_FACT 32
117#define MAX_RD_THRESH_FREQ_INC 1
118
119static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES],
120                             vp9_coeff_count (*cnoskip)[BLOCK_TYPES],
121                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
122  int i, j, k, l;
123  TX_SIZE t;
124  for (t = TX_4X4; t <= TX_32X32; t++)
125    for (i = 0; i < BLOCK_TYPES; i++)
126      for (j = 0; j < REF_TYPES; j++)
127        for (k = 0; k < COEF_BANDS; k++)
128          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
129            vp9_prob probs[ENTROPY_NODES];
130            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
131            vp9_cost_tokens((int *)cnoskip[t][i][j][k][l], probs,
132                            vp9_coef_tree);
133#if CONFIG_BALANCED_COEFTREE
134            // Replace the eob node prob with a very small value so that the
135            // cost approximately equals the cost without the eob node
136            probs[1] = 1;
137            vp9_cost_tokens((int *)c[t][i][j][k][l], probs, vp9_coef_tree);
138#else
139            vp9_cost_tokens_skip((int *)c[t][i][j][k][l], probs,
140                                 vp9_coef_tree);
141            assert(c[t][i][j][k][l][DCT_EOB_TOKEN] ==
142                   cnoskip[t][i][j][k][l][DCT_EOB_TOKEN]);
143#endif
144          }
145}
146
147static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
148                                0, 0, 0, 0, 0, 0, 0, 0,
149                                0, 0, 0, 0, 0, 0, 0, 0,
150                                0, 0, 0, 0, 0, 0, 0, 0, };
151
152// 3* dc_qlookup[Q]*dc_qlookup[Q];
153
154/* values are now correlated to quantizer */
155static int sad_per_bit16lut[QINDEX_RANGE];
156static int sad_per_bit4lut[QINDEX_RANGE];
157
158void vp9_init_me_luts() {
159  int i;
160
161  // Initialize the sad lut tables using a formulaic calculation for now
162  // This is to make it easier to resolve the impact of experimental changes
163  // to the quantizer tables.
164  for (i = 0; i < QINDEX_RANGE; i++) {
165    sad_per_bit16lut[i] =
166      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
167    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
168  }
169}
170
171static int compute_rd_mult(int qindex) {
172  const int q = vp9_dc_quant(qindex, 0);
173  return (11 * q * q) >> 2;
174}
175
176void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
177  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
178  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
179}
180
181
182void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
183  int q, i, bsize;
184
185  vp9_clear_system_state();  // __asm emms;
186
187  // Further tests required to see if optimum is different
188  // for key frames, golden frames and arf frames.
189  // if (cpi->common.refresh_golden_frame ||
190  //     cpi->common.refresh_alt_ref_frame)
191  qindex = clamp(qindex, 0, MAXQ);
192
193  cpi->RDMULT = compute_rd_mult(qindex);
194  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
195    if (cpi->twopass.next_iiratio > 31)
196      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
197    else
198      cpi->RDMULT +=
199          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
200  }
201  cpi->mb.errorperbit = cpi->RDMULT >> 6;
202  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
203
204  vp9_set_speed_features(cpi);
205
206  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
207  q <<= 2;
208  if (q < 8)
209    q = 8;
210
211  if (cpi->RDMULT > 1000) {
212    cpi->RDDIV = 1;
213    cpi->RDMULT /= 100;
214
215    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
216      for (i = 0; i < MAX_MODES; ++i) {
217        // Threshold here seem unecessarily harsh but fine given actual
218        // range of values used for cpi->sf.thresh_mult[]
219        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
220
221        // *4 relates to the scaling of rd_thresh_block_size_factor[]
222        if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
223          cpi->rd_threshes[bsize][i] =
224            cpi->sf.thresh_mult[i] * q *
225            rd_thresh_block_size_factor[bsize] / (4 * 100);
226        } else {
227          cpi->rd_threshes[bsize][i] = INT_MAX;
228        }
229        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
230        cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
231      }
232    }
233  } else {
234    cpi->RDDIV = 100;
235
236    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
237      for (i = 0; i < MAX_MODES; i++) {
238        // Threshold here seem unecessarily harsh but fine given actual
239        // range of values used for cpi->sf.thresh_mult[]
240        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
241
242        if (cpi->sf.thresh_mult[i] < thresh_max) {
243          cpi->rd_threshes[bsize][i] =
244            cpi->sf.thresh_mult[i] * q *
245            rd_thresh_block_size_factor[bsize] / 4;
246        } else {
247          cpi->rd_threshes[bsize][i] = INT_MAX;
248        }
249        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
250        cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
251      }
252    }
253  }
254
255  fill_token_costs(cpi->mb.token_costs,
256                   cpi->mb.token_costs_noskip,
257                   cpi->common.fc.coef_probs);
258
259  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
260    vp9_cost_tokens(cpi->mb.partition_cost[i],
261                    cpi->common.fc.partition_prob[cpi->common.frame_type][i],
262                    vp9_partition_tree);
263
264  /*rough estimate for costing*/
265  vp9_init_mode_costs(cpi);
266
267  if (cpi->common.frame_type != KEY_FRAME) {
268    vp9_build_nmv_cost_table(
269        cpi->mb.nmvjointcost,
270        cpi->mb.e_mbd.allow_high_precision_mv ?
271        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
272        &cpi->common.fc.nmvc,
273        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
274  }
275}
276
277int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
278  int i, error = 0;
279
280  for (i = 0; i < block_size; i++) {
281    int this_diff = coeff[i] - dqcoeff[i];
282    error += this_diff * this_diff;
283  }
284
285  return error;
286}
287
288static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
289                              int plane, int block, PLANE_TYPE type,
290                              ENTROPY_CONTEXT *A,
291                              ENTROPY_CONTEXT *L,
292                              TX_SIZE tx_size,
293                              int y_blocks) {
294  MACROBLOCKD *const xd = &mb->e_mbd;
295  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
296  int pt;
297  int c = 0;
298  int cost = 0, pad;
299  const int *scan, *nb;
300  const int eob = xd->plane[plane].eobs[block];
301  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
302                                           block, 16);
303  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
304  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
305      mb->token_costs[tx_size][type][ref];
306  ENTROPY_CONTEXT above_ec, left_ec;
307  TX_TYPE tx_type = DCT_DCT;
308
309  const int segment_id = xd->mode_info_context->mbmi.segment_id;
310  unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
311      mb->token_costs_noskip[tx_size][type][ref];
312
313  int seg_eob, default_eob;
314  uint8_t token_cache[1024];
315  const uint8_t * band_translate;
316
317  // Check for consistency of tx_size with mode info
318  assert((!type && !plane) || (type && plane));
319  if (type == PLANE_TYPE_Y_WITH_DC) {
320    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
321  } else {
322    TX_SIZE tx_size_uv = get_uv_tx_size(mbmi);
323    assert(tx_size == tx_size_uv);
324  }
325
326  switch (tx_size) {
327    case TX_4X4: {
328      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
329          get_tx_type_4x4(xd, block) : DCT_DCT;
330      above_ec = A[0] != 0;
331      left_ec = L[0] != 0;
332      seg_eob = 16;
333      scan = get_scan_4x4(tx_type);
334      band_translate = vp9_coefband_trans_4x4;
335      break;
336    }
337    case TX_8X8: {
338      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
339      const int sz = 1 + b_width_log2(sb_type);
340      const int x = block & ((1 << sz) - 1), y = block - x;
341      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
342          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
343      above_ec = (A[0] + A[1]) != 0;
344      left_ec = (L[0] + L[1]) != 0;
345      scan = get_scan_8x8(tx_type);
346      seg_eob = 64;
347      band_translate = vp9_coefband_trans_8x8plus;
348      break;
349    }
350    case TX_16X16: {
351      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
352      const int sz = 2 + b_width_log2(sb_type);
353      const int x = block & ((1 << sz) - 1), y = block - x;
354      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
355          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
356      scan = get_scan_16x16(tx_type);
357      seg_eob = 256;
358      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
359      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
360      band_translate = vp9_coefband_trans_8x8plus;
361      break;
362    }
363    case TX_32X32:
364      scan = vp9_default_scan_32x32;
365      seg_eob = 1024;
366      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
367      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
368      band_translate = vp9_coefband_trans_8x8plus;
369      break;
370    default:
371      abort();
372      break;
373  }
374  assert(eob <= seg_eob);
375
376  pt = combine_entropy_contexts(above_ec, left_ec);
377  nb = vp9_get_coef_neighbors_handle(scan, &pad);
378  default_eob = seg_eob;
379
380  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
381    seg_eob = 0;
382
383  /* sanity check to ensure that we do not have spurious non-zero q values */
384  if (eob < seg_eob)
385    assert(qcoeff_ptr[scan[eob]] == 0);
386
387  {
388    for (c = 0; c < eob; c++) {
389      int v = qcoeff_ptr[scan[c]];
390      int t = vp9_dct_value_tokens_ptr[v].token;
391      int band = get_coef_band(band_translate, c);
392      if (c)
393        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
394
395      if (!c || token_cache[scan[c - 1]])  // do not skip eob
396        cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
397      else
398        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
399      token_cache[scan[c]] = vp9_pt_energy_class[t];
400    }
401    if (c < seg_eob) {
402      if (c)
403        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
404      cost += mb->token_costs_noskip[tx_size][type][ref]
405          [get_coef_band(band_translate, c)]
406          [pt][DCT_EOB_TOKEN];
407    }
408  }
409
410  // is eob first coefficient;
411  for (pt = 0; pt < (1 << tx_size); pt++) {
412    A[pt] = L[pt] = c > 0;
413  }
414
415  return cost;
416}
417
418static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
419                                     int (*r)[2], int *rate,
420                                     int *d, int *distortion,
421                                     int *s, int *skip,
422                                     int64_t txfm_cache[NB_TXFM_MODES],
423                                     TX_SIZE max_txfm_size) {
424  VP9_COMMON *const cm = &cpi->common;
425  MACROBLOCKD *const xd = &x->e_mbd;
426  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
427  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
428  int64_t rd[TX_SIZE_MAX_SB][2];
429  int n, m;
430  int s0, s1;
431
432  const vp9_prob *tx_probs = vp9_get_pred_probs(cm, xd, PRED_TX_SIZE);
433
434  for (n = TX_4X4; n <= max_txfm_size; n++) {
435    r[n][1] = r[n][0];
436    for (m = 0; m <= n - (n == max_txfm_size); m++) {
437      if (m == n)
438        r[n][1] += vp9_cost_zero(tx_probs[m]);
439      else
440        r[n][1] += vp9_cost_one(tx_probs[m]);
441    }
442  }
443
444  assert(skip_prob > 0);
445  s0 = vp9_cost_bit(skip_prob, 0);
446  s1 = vp9_cost_bit(skip_prob, 1);
447
448  for (n = TX_4X4; n <= max_txfm_size; n++) {
449    if (s[n]) {
450      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
451    } else {
452      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
453      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
454    }
455  }
456
457  if (max_txfm_size == TX_32X32 &&
458      (cm->txfm_mode == ALLOW_32X32 ||
459       (cm->txfm_mode == TX_MODE_SELECT &&
460        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
461        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
462    mbmi->txfm_size = TX_32X32;
463  } else if (max_txfm_size >= TX_16X16 &&
464             (cm->txfm_mode == ALLOW_16X16 ||
465              cm->txfm_mode == ALLOW_32X32 ||
466              (cm->txfm_mode == TX_MODE_SELECT &&
467               rd[TX_16X16][1] < rd[TX_8X8][1] &&
468               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
469    mbmi->txfm_size = TX_16X16;
470  } else if (cm->txfm_mode == ALLOW_8X8 ||
471             cm->txfm_mode == ALLOW_16X16 ||
472             cm->txfm_mode == ALLOW_32X32 ||
473           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
474    mbmi->txfm_size = TX_8X8;
475  } else {
476    mbmi->txfm_size = TX_4X4;
477  }
478
479  *distortion = d[mbmi->txfm_size];
480  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
481  *skip       = s[mbmi->txfm_size];
482
483  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
484  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
485  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
486  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
487  if (max_txfm_size == TX_32X32 &&
488      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
489      rd[TX_32X32][1] < rd[TX_4X4][1])
490    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
491  else if (max_txfm_size >= TX_16X16 &&
492           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
493    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
494  else
495    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
496                                 rd[TX_4X4][1] : rd[TX_8X8][1];
497}
498
499static int block_error(int16_t *coeff, int16_t *dqcoeff,
500                       int block_size, int shift) {
501  int i;
502  int64_t error = 0;
503
504  for (i = 0; i < block_size; i++) {
505    int this_diff = coeff[i] - dqcoeff[i];
506    error += (unsigned)this_diff * this_diff;
507  }
508  error >>= shift;
509
510  return error > INT_MAX ? INT_MAX : (int)error;
511}
512
513static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
514  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
515  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
516                     16 << (bwl + bhl), shift);
517}
518
519static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
520  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
521  int64_t sum = 0;
522  int plane;
523
524  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
525    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
526                            x->e_mbd.plane[plane].subsampling_y;
527    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
528                       16 << (bwl + bhl - subsampling), 0);
529  }
530  sum >>= shift;
531  return sum > INT_MAX ? INT_MAX : (int)sum;
532}
533
534struct rdcost_block_args {
535  VP9_COMMON *cm;
536  MACROBLOCK *x;
537  ENTROPY_CONTEXT t_above[16];
538  ENTROPY_CONTEXT t_left[16];
539  TX_SIZE tx_size;
540  int bw;
541  int bh;
542  int cost;
543};
544
545static void rdcost_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
546                         int ss_txfrm_size, void *arg) {
547  struct rdcost_block_args* args = arg;
548  int x_idx, y_idx;
549  MACROBLOCKD * const xd = &args->x->e_mbd;
550
551  txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
552                           &y_idx);
553
554  args->cost += cost_coeffs(args->cm, args->x, plane, block,
555                            xd->plane[plane].plane_type, args->t_above + x_idx,
556                            args->t_left + y_idx, args->tx_size,
557                            args->bw * args->bh);
558}
559
560static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
561                        BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
562  MACROBLOCKD * const xd = &x->e_mbd;
563  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
564  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
565  const int bw = 1 << bwl, bh = 1 << bhl;
566  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh, 0 };
567
568  vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
569             sizeof(ENTROPY_CONTEXT) * bw);
570  vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
571             sizeof(ENTROPY_CONTEXT) * bh);
572
573  foreach_transformed_block_in_plane(xd, bsize, plane, rdcost_block, &args);
574
575  return args.cost;
576}
577
578static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
579                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
580  int cost = 0, plane;
581
582  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
583    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
584  }
585  return cost;
586}
587
588static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
589                                     int *rate, int *distortion, int *skippable,
590                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
591  MACROBLOCKD *const xd = &x->e_mbd;
592  xd->mode_info_context->mbmi.txfm_size = tx_size;
593
594  if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
595    vp9_encode_intra_block_y(cm, x, bsize);
596  else
597    vp9_xform_quant_sby(cm, x, bsize);
598
599  *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
600  *rate       = rdcost_plane(cm, x, 0, bsize, tx_size);
601  *skippable  = vp9_sby_is_skippable(xd, bsize);
602}
603
604static void super_block_yrd(VP9_COMP *cpi,
605                            MACROBLOCK *x, int *rate, int *distortion,
606                            int *skip, BLOCK_SIZE_TYPE bs,
607                            int64_t txfm_cache[NB_TXFM_MODES]) {
608  VP9_COMMON *const cm = &cpi->common;
609  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
610  MACROBLOCKD *xd = &x->e_mbd;
611  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
612
613  assert(bs == mbmi->sb_type);
614  if (mbmi->ref_frame[0] > INTRA_FRAME)
615    vp9_subtract_sby(x, bs);
616
617  if (cpi->speed > 4) {
618    if (bs >= BLOCK_SIZE_SB32X32) {
619      mbmi->txfm_size = TX_32X32;
620    } else if (bs >= BLOCK_SIZE_MB16X16) {
621      mbmi->txfm_size = TX_16X16;
622    } else if (bs >= BLOCK_SIZE_SB8X8) {
623      mbmi->txfm_size = TX_8X8;
624    } else {
625      mbmi->txfm_size = TX_4X4;
626    }
627    vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t));
628    super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs,
629                             mbmi->txfm_size);
630    return;
631  }
632  if (bs >= BLOCK_SIZE_SB32X32)
633    super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
634                             bs, TX_32X32);
635  if (bs >= BLOCK_SIZE_MB16X16)
636    super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
637                             bs, TX_16X16);
638  super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
639                           TX_8X8);
640  super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
641                           TX_4X4);
642
643  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
644                           skip, txfm_cache,
645                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
646                           - (bs < BLOCK_SIZE_MB16X16));
647}
648
649static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
650                                     MB_PREDICTION_MODE *best_mode,
651                                     int *bmode_costs,
652                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
653                                     int *bestrate, int *bestratey,
654                                     int *bestdistortion,
655                                     BLOCK_SIZE_TYPE bsize) {
656  MB_PREDICTION_MODE mode;
657  MACROBLOCKD *xd = &x->e_mbd;
658  int64_t best_rd = INT64_MAX;
659  int rate = 0;
660  int distortion;
661  VP9_COMMON *const cm = &cpi->common;
662  const int src_stride = x->plane[0].src.stride;
663  uint8_t *src, *dst;
664  int16_t *src_diff, *coeff;
665
666  ENTROPY_CONTEXT ta[2], tempa[2];
667  ENTROPY_CONTEXT tl[2], templ[2];
668  TX_TYPE tx_type = DCT_DCT;
669  TX_TYPE best_tx_type = DCT_DCT;
670  int bw = 1 << b_width_log2(bsize);
671  int bh = 1 << b_height_log2(bsize);
672  int idx, idy, block;
673  DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
674
675  assert(ib < 4);
676
677  vpx_memcpy(ta, a, sizeof(ta));
678  vpx_memcpy(tl, l, sizeof(tl));
679  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
680
681  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
682    int64_t this_rd;
683    int ratey = 0;
684
685    rate = bmode_costs[mode];
686    distortion = 0;
687
688    vpx_memcpy(tempa, ta, sizeof(ta));
689    vpx_memcpy(templ, tl, sizeof(tl));
690
691    for (idy = 0; idy < bh; ++idy) {
692      for (idx = 0; idx < bw; ++idx) {
693        block = ib + idy * 2 + idx;
694        xd->mode_info_context->bmi[block].as_mode.first = mode;
695        src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
696                                        x->plane[0].src.buf, src_stride);
697        src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
698                                             x->plane[0].src_diff);
699        coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
700        dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
701                                        xd->plane[0].dst.buf,
702                                        xd->plane[0].dst.stride);
703        vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode,
704                             dst, xd->plane[0].dst.stride);
705        vp9_subtract_block(4, 4, src_diff, 8,
706                           src, src_stride,
707                           dst, xd->plane[0].dst.stride);
708
709        tx_type = get_tx_type_4x4(xd, block);
710        if (tx_type != DCT_DCT) {
711          vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
712          x->quantize_b_4x4(x, block, tx_type, 16);
713        } else {
714          x->fwd_txm4x4(src_diff, coeff, 16);
715          x->quantize_b_4x4(x, block, tx_type, 16);
716        }
717
718        ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
719                             tempa + idx, templ + idy, TX_4X4, 16);
720        distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
721                                                         block, 16), 16) >> 2;
722
723        if (best_tx_type != DCT_DCT)
724          vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
725                               dst, xd->plane[0].dst.stride, best_tx_type);
726        else
727          xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
728                             dst, xd->plane[0].dst.stride);
729      }
730    }
731
732    rate += ratey;
733    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
734
735    if (this_rd < best_rd) {
736      *bestrate = rate;
737      *bestratey = ratey;
738      *bestdistortion = distortion;
739      best_rd = this_rd;
740      *best_mode = mode;
741      best_tx_type = tx_type;
742      vpx_memcpy(a, tempa, sizeof(tempa));
743      vpx_memcpy(l, templ, sizeof(templ));
744      for (idy = 0; idy < bh; ++idy) {
745        for (idx = 0; idx < bw; ++idx) {
746          block = ib + idy * 2 + idx;
747          vpx_memcpy(best_dqcoeff[idy * 2 + idx],
748                     BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
749                     sizeof(best_dqcoeff[0]));
750        }
751      }
752    }
753  }
754
755  for (idy = 0; idy < bh; ++idy) {
756    for (idx = 0; idx < bw; ++idx) {
757      block = ib + idy * 2 + idx;
758      xd->mode_info_context->bmi[block].as_mode.first = *best_mode;
759      dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
760                                      xd->plane[0].dst.buf,
761                                      xd->plane[0].dst.stride);
762
763      vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode,
764                           dst, xd->plane[0].dst.stride);
765      // inverse transform
766      if (best_tx_type != DCT_DCT)
767        vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
768                             xd->plane[0].dst.stride, best_tx_type);
769      else
770        xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
771                           xd->plane[0].dst.stride);
772    }
773  }
774
775  return best_rd;
776}
777
778static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
779                                         int *Rate, int *rate_y,
780                                         int *Distortion, int64_t best_rd) {
781  int i, j;
782  MACROBLOCKD *const xd = &mb->e_mbd;
783  BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
784  int bw = 1 << b_width_log2(bsize);
785  int bh = 1 << b_height_log2(bsize);
786  int idx, idy;
787  int cost = 0;
788  int distortion = 0;
789  int tot_rate_y = 0;
790  int64_t total_rd = 0;
791  ENTROPY_CONTEXT t_above[4], t_left[4];
792  int *bmode_costs;
793  MODE_INFO *const mic = xd->mode_info_context;
794
795  vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
796  vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
797
798  bmode_costs = mb->mbmode_cost;
799
800  for (idy = 0; idy < 2; idy += bh) {
801    for (idx = 0; idx < 2; idx += bw) {
802      const int mis = xd->mode_info_stride;
803      MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
804      int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
805      int UNINITIALIZED_IS_SAFE(d);
806      i = idy * 2 + idx;
807
808      if (xd->frame_type == KEY_FRAME) {
809        const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis);
810        const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
811                                     left_block_mode(mic, i) : DC_PRED;
812
813        bmode_costs  = mb->y_mode_costs[A][L];
814      }
815
816      total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
817                                        t_above + idx, t_left + idy,
818                                        &r, &ry, &d, bsize);
819      cost += r;
820      distortion += d;
821      tot_rate_y += ry;
822
823      mic->bmi[i].as_mode.first = best_mode;
824      for (j = 1; j < bh; ++j)
825        mic->bmi[i + j * 2].as_mode.first = best_mode;
826      for (j = 1; j < bw; ++j)
827        mic->bmi[i + j].as_mode.first = best_mode;
828
829      if (total_rd >= best_rd)
830        break;
831    }
832  }
833
834  if (total_rd >= best_rd)
835    return INT64_MAX;
836
837  *Rate = cost;
838  *rate_y = tot_rate_y;
839  *Distortion = distortion;
840  xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode.first;
841
842  return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
843}
844
845static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
846                                      int *rate, int *rate_tokenonly,
847                                      int *distortion, int *skippable,
848                                      BLOCK_SIZE_TYPE bsize,
849                                      int64_t txfm_cache[NB_TXFM_MODES]) {
850  MB_PREDICTION_MODE mode;
851  MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
852  MACROBLOCKD *const xd = &x->e_mbd;
853  int this_rate, this_rate_tokenonly;
854  int this_distortion, s;
855  int64_t best_rd = INT64_MAX, this_rd;
856  TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
857  int i;
858  int *bmode_costs = x->mbmode_cost;
859
860  if (bsize < BLOCK_SIZE_SB8X8) {
861    x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
862    return best_rd;
863  }
864
865  for (i = 0; i < NB_TXFM_MODES; i++)
866    txfm_cache[i] = INT64_MAX;
867
868  /* Y Search for 32x32 intra prediction mode */
869  for (mode = DC_PRED; mode <= TM_PRED; mode++) {
870    int64_t local_txfm_cache[NB_TXFM_MODES];
871    MODE_INFO *const mic = xd->mode_info_context;
872    const int mis = xd->mode_info_stride;
873
874    if (cpi->common.frame_type == KEY_FRAME) {
875      const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
876      const MB_PREDICTION_MODE L = xd->left_available ?
877                                   left_block_mode(mic, 0) : DC_PRED;
878
879      bmode_costs = x->y_mode_costs[A][L];
880    }
881    x->e_mbd.mode_info_context->mbmi.mode = mode;
882
883    super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
884                    bsize, local_txfm_cache);
885
886    this_rate = this_rate_tokenonly + bmode_costs[mode];
887    this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
888
889    if (this_rd < best_rd) {
890      mode_selected   = mode;
891      best_rd         = this_rd;
892      best_tx         = x->e_mbd.mode_info_context->mbmi.txfm_size;
893      *rate           = this_rate;
894      *rate_tokenonly = this_rate_tokenonly;
895      *distortion     = this_distortion;
896      *skippable      = s;
897    }
898
899    for (i = 0; i < NB_TXFM_MODES; i++) {
900      int64_t adj_rd = this_rd + local_txfm_cache[i] -
901                       local_txfm_cache[cpi->common.txfm_mode];
902      if (adj_rd < txfm_cache[i]) {
903        txfm_cache[i] = adj_rd;
904      }
905    }
906  }
907
908  x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
909  x->e_mbd.mode_info_context->mbmi.txfm_size = best_tx;
910
911  return best_rd;
912}
913
914static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
915                                      int *rate, int *distortion,
916                                      int *skippable, BLOCK_SIZE_TYPE bsize,
917                                      TX_SIZE uv_tx_size) {
918  MACROBLOCKD *const xd = &x->e_mbd;
919  if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
920    vp9_encode_intra_block_uv(cm, x, bsize);
921  else
922    vp9_xform_quant_sbuv(cm, x, bsize);
923
924  *distortion = block_error_sbuv(x, bsize, uv_tx_size == TX_32X32 ? 0 : 2);
925  *rate       = rdcost_uv(cm, x, bsize, uv_tx_size);
926  *skippable  = vp9_sbuv_is_skippable(xd, bsize);
927}
928
929static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
930                             int *rate, int *distortion, int *skippable,
931                             BLOCK_SIZE_TYPE bsize) {
932  MACROBLOCKD *const xd = &x->e_mbd;
933  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
934
935  if (mbmi->ref_frame[0] > INTRA_FRAME)
936    vp9_subtract_sbuv(x, bsize);
937
938  if (mbmi->txfm_size >= TX_32X32 && bsize >= BLOCK_SIZE_SB64X64) {
939    super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
940                              TX_32X32);
941  } else if (mbmi->txfm_size >= TX_16X16 && bsize >= BLOCK_SIZE_SB32X32) {
942    super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
943                              TX_16X16);
944  } else if (mbmi->txfm_size >= TX_8X8 && bsize >= BLOCK_SIZE_MB16X16) {
945    super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
946                              TX_8X8);
947  } else {
948    super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
949                              TX_4X4);
950  }
951}
952
953static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
954                                       int *rate, int *rate_tokenonly,
955                                       int *distortion, int *skippable,
956                                       BLOCK_SIZE_TYPE bsize) {
957  MB_PREDICTION_MODE mode;
958  MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
959  int64_t best_rd = INT64_MAX, this_rd;
960  int this_rate_tokenonly, this_rate;
961  int this_distortion, s;
962
963  for (mode = DC_PRED; mode <= TM_PRED; mode++) {
964    x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
965    super_block_uvrd(&cpi->common, x, &this_rate_tokenonly,
966                     &this_distortion, &s, bsize);
967    this_rate = this_rate_tokenonly +
968                x->intra_uv_mode_cost[x->e_mbd.frame_type][mode];
969    this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
970
971    if (this_rd < best_rd) {
972      mode_selected   = mode;
973      best_rd         = this_rd;
974      *rate           = this_rate;
975      *rate_tokenonly = this_rate_tokenonly;
976      *distortion     = this_distortion;
977      *skippable      = s;
978    }
979  }
980
981  x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
982
983  return best_rd;
984}
985
986int vp9_cost_mv_ref(VP9_COMP *cpi,
987                    MB_PREDICTION_MODE m,
988                    const int mode_context) {
989  MACROBLOCKD *xd = &cpi->mb.e_mbd;
990  int segment_id = xd->mode_info_context->mbmi.segment_id;
991
992  // Dont account for mode here if segment skip is enabled.
993  if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
994    VP9_COMMON *pc = &cpi->common;
995    assert(NEARESTMV <= m  &&  m <= NEWMV);
996    return cost_token(vp9_sb_mv_ref_tree,
997                      pc->fc.inter_mode_probs[mode_context],
998                      vp9_sb_mv_ref_encoding_array - NEARESTMV + m);
999  } else
1000    return 0;
1001}
1002
1003void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
1004  x->e_mbd.mode_info_context->mbmi.mode = mb;
1005  x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int;
1006}
1007
1008static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1009                                BLOCK_SIZE_TYPE bsize,
1010                                int_mv *frame_mv,
1011                                int mi_row, int mi_col,
1012                                int_mv single_newmv[MAX_REF_FRAMES],
1013                                int *rate_mv);
1014static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1015                                 BLOCK_SIZE_TYPE bsize,
1016                                 int mi_row, int mi_col,
1017                                 int_mv *tmp_mv, int *rate_mv);
1018
1019static int labels2mode(MACROBLOCK *x, int i,
1020                       MB_PREDICTION_MODE this_mode,
1021                       int_mv *this_mv, int_mv *this_second_mv,
1022                       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1023                       int_mv seg_mvs[MAX_REF_FRAMES],
1024                       int_mv *best_ref_mv,
1025                       int_mv *second_best_ref_mv,
1026                       int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
1027  MACROBLOCKD *const xd = &x->e_mbd;
1028  MODE_INFO *const mic = xd->mode_info_context;
1029  MB_MODE_INFO * mbmi = &mic->mbmi;
1030  int cost = 0, thismvcost = 0;
1031  int idx, idy;
1032  int bw = 1 << b_width_log2(mbmi->sb_type);
1033  int bh = 1 << b_height_log2(mbmi->sb_type);
1034
1035  /* We have to be careful retrieving previously-encoded motion vectors.
1036   Ones from this macroblock have to be pulled from the BLOCKD array
1037   as they have not yet made it to the bmi array in our MB_MODE_INFO. */
1038  MB_PREDICTION_MODE m;
1039
1040  // the only time we should do costing for new motion vector or mode
1041  // is when we are on a new label  (jbb May 08, 2007)
1042  switch (m = this_mode) {
1043    case NEWMV:
1044      this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1045      thismvcost  = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
1046                                    102, xd->allow_high_precision_mv);
1047      if (mbmi->ref_frame[1] > 0) {
1048        this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1049        thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
1050                                      mvjcost, mvcost, 102,
1051                                      xd->allow_high_precision_mv);
1052      }
1053      break;
1054    case NEARESTMV:
1055      this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
1056      if (mbmi->ref_frame[1] > 0)
1057        this_second_mv->as_int =
1058            frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
1059      break;
1060    case NEARMV:
1061      this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
1062      if (mbmi->ref_frame[1] > 0)
1063        this_second_mv->as_int =
1064            frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
1065      break;
1066    case ZEROMV:
1067      this_mv->as_int = 0;
1068      if (mbmi->ref_frame[1] > 0)
1069        this_second_mv->as_int = 0;
1070      break;
1071    default:
1072      break;
1073  }
1074
1075  cost = vp9_cost_mv_ref(cpi, this_mode,
1076                         mbmi->mb_mode_context[mbmi->ref_frame[0]]);
1077
1078  mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
1079  if (mbmi->ref_frame[1] > 0)
1080    mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
1081
1082  x->partition_info->bmi[i].mode = m;
1083  x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
1084  if (mbmi->ref_frame[1] > 0)
1085    x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int;
1086  for (idy = 0; idy < bh; ++idy) {
1087    for (idx = 0; idx < bw; ++idx) {
1088      vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1089                 &mic->bmi[i], sizeof(mic->bmi[i]));
1090      vpx_memcpy(&x->partition_info->bmi[i + idy * 2 + idx],
1091                 &x->partition_info->bmi[i],
1092                 sizeof(x->partition_info->bmi[i]));
1093    }
1094  }
1095
1096  cost += thismvcost;
1097  return cost;
1098}
1099
1100static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
1101                                       MACROBLOCK *x,
1102                                       int i,
1103                                       int *labelyrate,
1104                                       int *distortion,
1105                                       ENTROPY_CONTEXT *ta,
1106                                       ENTROPY_CONTEXT *tl) {
1107  int k;
1108  MACROBLOCKD *xd = &x->e_mbd;
1109  BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
1110  int bwl = b_width_log2(bsize), bw = 1 << bwl;
1111  int bhl = b_height_log2(bsize), bh = 1 << bhl;
1112  int idx, idy;
1113  const int src_stride = x->plane[0].src.stride;
1114  uint8_t* const src =
1115  raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1116                            x->plane[0].src.buf, src_stride);
1117  int16_t* src_diff =
1118  raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i,
1119                            x->plane[0].src_diff);
1120  int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i);
1121  uint8_t* const pre =
1122  raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1123                            xd->plane[0].pre[0].buf,
1124                            xd->plane[0].pre[0].stride);
1125  uint8_t* const dst =
1126  raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1127                            xd->plane[0].dst.buf,
1128                            xd->plane[0].dst.stride);
1129  int thisdistortion = 0;
1130  int thisrate = 0;
1131
1132  *labelyrate = 0;
1133  *distortion = 0;
1134
1135  vp9_build_inter_predictor(pre,
1136                            xd->plane[0].pre[0].stride,
1137                            dst,
1138                            xd->plane[0].dst.stride,
1139                            &xd->mode_info_context->bmi[i].as_mv[0],
1140                            &xd->scale_factor[0],
1141                            4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix);
1142
1143  // TODO(debargha): Make this work properly with the
1144  // implicit-compoundinter-weight experiment when implicit
1145  // weighting for splitmv modes is turned on.
1146  if (xd->mode_info_context->mbmi.ref_frame[1] > 0) {
1147    uint8_t* const second_pre =
1148    raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
1149                              xd->plane[0].pre[1].buf,
1150                              xd->plane[0].pre[1].stride);
1151    vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride,
1152                              dst, xd->plane[0].dst.stride,
1153                              &xd->mode_info_context->bmi[i].as_mv[1],
1154                              &xd->scale_factor[1], 4 * bw, 4 * bh, 1,
1155                              &xd->subpix);
1156  }
1157
1158  vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8,
1159                     src, src_stride,
1160                     dst, xd->plane[0].dst.stride);
1161
1162  k = i;
1163  for (idy = 0; idy < bh; ++idy) {
1164    for (idx = 0; idx < bw; ++idx) {
1165      k += (idy * 2 + idx);
1166      src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k,
1167                                           x->plane[0].src_diff);
1168      coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k);
1169      x->fwd_txm4x4(src_diff, coeff, 16);
1170      x->quantize_b_4x4(x, k, DCT_DCT, 16);
1171      thisdistortion += vp9_block_error(coeff,
1172                                        BLOCK_OFFSET(xd->plane[0].dqcoeff,
1173                                                     k, 16), 16);
1174      thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC,
1175                              ta + (k & 1),
1176                              tl + (k >> 1), TX_4X4, 16);
1177    }
1178  }
1179  *distortion += thisdistortion;
1180  *labelyrate += thisrate;
1181
1182  *distortion >>= 2;
1183  return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1184}
1185
1186typedef struct {
1187  int_mv *ref_mv, *second_ref_mv;
1188  int_mv mvp;
1189
1190  int64_t segment_rd;
1191  int r;
1192  int d;
1193  int segment_yrate;
1194  MB_PREDICTION_MODE modes[4];
1195  int_mv mvs[4], second_mvs[4];
1196  int eobs[4];
1197  int mvthresh;
1198} BEST_SEG_INFO;
1199
1200static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
1201  int r = 0;
1202  r |= (mv->as_mv.row >> 3) < x->mv_row_min;
1203  r |= (mv->as_mv.row >> 3) > x->mv_row_max;
1204  r |= (mv->as_mv.col >> 3) < x->mv_col_min;
1205  r |= (mv->as_mv.col >> 3) > x->mv_col_max;
1206  return r;
1207}
1208
1209static enum BlockSize get_block_size(int bw, int bh) {
1210  if (bw == 4 && bh == 4)
1211    return BLOCK_4X4;
1212
1213  if (bw == 4 && bh == 8)
1214    return BLOCK_4X8;
1215
1216  if (bw == 8 && bh == 4)
1217    return BLOCK_8X4;
1218
1219  if (bw == 8 && bh == 8)
1220    return BLOCK_8X8;
1221
1222  if (bw == 8 && bh == 16)
1223    return BLOCK_8X16;
1224
1225  if (bw == 16 && bh == 8)
1226    return BLOCK_16X8;
1227
1228  if (bw == 16 && bh == 16)
1229    return BLOCK_16X16;
1230
1231  if (bw == 32 && bh == 32)
1232    return BLOCK_32X32;
1233
1234  if (bw == 32 && bh == 16)
1235    return BLOCK_32X16;
1236
1237  if (bw == 16 && bh == 32)
1238    return BLOCK_16X32;
1239
1240  if (bw == 64 && bh == 32)
1241    return BLOCK_64X32;
1242
1243  if (bw == 32 && bh == 64)
1244    return BLOCK_32X64;
1245
1246  if (bw == 64 && bh == 64)
1247    return BLOCK_64X64;
1248
1249  assert(0);
1250  return -1;
1251}
1252
1253static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1254  MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
1255  x->plane[0].src.buf =
1256      raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
1257                                x->plane[0].src.buf,
1258                                x->plane[0].src.stride);
1259  assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0);
1260  x->e_mbd.plane[0].pre[0].buf =
1261      raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
1262                                x->e_mbd.plane[0].pre[0].buf,
1263                                x->e_mbd.plane[0].pre[0].stride);
1264  if (mbmi->ref_frame[1])
1265    x->e_mbd.plane[0].pre[1].buf =
1266        raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
1267                                  x->e_mbd.plane[0].pre[1].buf,
1268                                  x->e_mbd.plane[0].pre[1].stride);
1269}
1270
1271static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1272                                  struct buf_2d orig_pre[2]) {
1273  MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
1274  x->plane[0].src = orig_src;
1275  x->e_mbd.plane[0].pre[0] = orig_pre[0];
1276  if (mbmi->ref_frame[1])
1277    x->e_mbd.plane[0].pre[1] = orig_pre[1];
1278}
1279
1280static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
1281                                    BEST_SEG_INFO *bsi,
1282                                    int_mv seg_mvs[4][MAX_REF_FRAMES],
1283                                    int mi_row, int mi_col) {
1284  int i, j;
1285  int br = 0, bd = 0;
1286  MB_PREDICTION_MODE this_mode;
1287  MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
1288  const int label_count = 4;
1289  int64_t this_segment_rd = 0, other_segment_rd;
1290  int label_mv_thresh;
1291  int rate = 0;
1292  int sbr = 0, sbd = 0;
1293  int segmentyrate = 0;
1294  int best_eobs[4] = { 0 };
1295  BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
1296  int bwl = b_width_log2(bsize), bw = 1 << bwl;
1297  int bhl = b_height_log2(bsize), bh = 1 << bhl;
1298  int idx, idy;
1299  vp9_variance_fn_ptr_t *v_fn_ptr;
1300  ENTROPY_CONTEXT t_above[4], t_left[4];
1301  ENTROPY_CONTEXT t_above_b[4], t_left_b[4];
1302
1303  vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
1304  vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
1305
1306  v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)];
1307
1308  // 64 makes this threshold really big effectively
1309  // making it so that we very rarely check mvs on
1310  // segments.   setting this to 1 would make mv thresh
1311  // roughly equal to what it is for macroblocks
1312  label_mv_thresh = 1 * bsi->mvthresh / label_count;
1313
1314  // Segmentation method overheads
1315  other_segment_rd = this_segment_rd;
1316
1317  for (idy = 0; idy < 2; idy += bh) {
1318    for (idx = 0; idx < 2; idx += bw) {
1319      // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1320      // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1321      int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
1322      int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1323      int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
1324      MB_PREDICTION_MODE mode_selected = ZEROMV;
1325      int bestlabelyrate = 0;
1326      i = idy * 2 + idx;
1327
1328      frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
1329      frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
1330      vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1331                                    &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
1332                                    &frame_mv[NEARMV][mbmi->ref_frame[0]],
1333                                    i, 0);
1334      if (mbmi->ref_frame[1] > 0)
1335        vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1336                                   &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
1337                                   &frame_mv[NEARMV][mbmi->ref_frame[1]],
1338                                   i, 1);
1339
1340      // search for the best motion vector on this segment
1341      for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1342        int64_t this_rd;
1343        int distortion;
1344        int labelyrate;
1345        ENTROPY_CONTEXT t_above_s[4], t_left_s[4];
1346        const struct buf_2d orig_src = x->plane[0].src;
1347        struct buf_2d orig_pre[2];
1348
1349        vpx_memcpy(orig_pre, x->e_mbd.plane[0].pre, sizeof(orig_pre));
1350
1351        vpx_memcpy(t_above_s, t_above, sizeof(t_above_s));
1352        vpx_memcpy(t_left_s, t_left, sizeof(t_left_s));
1353
1354        // motion search for newmv (single predictor case only)
1355        if (mbmi->ref_frame[1] <= 0 && this_mode == NEWMV) {
1356          int step_param = 0;
1357          int further_steps;
1358          int thissme, bestsme = INT_MAX;
1359          int sadpb = x->sadperbit4;
1360          int_mv mvp_full;
1361
1362          /* Is the best so far sufficiently good that we cant justify doing
1363           * and new motion search. */
1364          if (best_label_rd < label_mv_thresh)
1365            break;
1366
1367          if (cpi->compressor_speed) {
1368            // use previous block's result as next block's MV predictor.
1369            if (i > 0) {
1370              bsi->mvp.as_int =
1371              x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int;
1372              if (i == 2)
1373                bsi->mvp.as_int =
1374                x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int;
1375              step_param = 2;
1376            }
1377          }
1378
1379          further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1380
1381          mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1382          mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1383
1384          // adjust src pointer for this block
1385          mi_buf_shift(x, i);
1386          bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1387                                           sadpb, further_steps, 0, v_fn_ptr,
1388                                           bsi->ref_mv, &mode_mv[NEWMV]);
1389
1390          // Should we do a full search (best quality only)
1391          if (cpi->compressor_speed == 0) {
1392            /* Check if mvp_full is within the range. */
1393            clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1394                     x->mv_row_min, x->mv_row_max);
1395
1396            thissme = cpi->full_search_sad(x, &mvp_full,
1397                                           sadpb, 16, v_fn_ptr,
1398                                           x->nmvjointcost, x->mvcost,
1399                                           bsi->ref_mv, i);
1400
1401            if (thissme < bestsme) {
1402              bestsme = thissme;
1403              mode_mv[NEWMV].as_int =
1404                  x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int;
1405            } else {
1406              /* The full search result is actually worse so re-instate the
1407               * previous best vector */
1408              x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int =
1409                  mode_mv[NEWMV].as_int;
1410            }
1411          }
1412
1413          if (bestsme < INT_MAX) {
1414            int distortion;
1415            unsigned int sse;
1416            cpi->find_fractional_mv_step(x, &mode_mv[NEWMV],
1417                                         bsi->ref_mv, x->errorperbit, v_fn_ptr,
1418                                         x->nmvjointcost, x->mvcost,
1419                                         &distortion, &sse);
1420
1421            // safe motion search result for use in compound prediction
1422            seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
1423          }
1424
1425          // restore src pointers
1426          mi_buf_restore(x, orig_src, orig_pre);
1427        } else if (mbmi->ref_frame[1] > 0 && this_mode == NEWMV) {
1428          if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1429              seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1430            continue;
1431
1432          // adjust src pointers
1433          mi_buf_shift(x, i);
1434          if (cpi->sf.comp_inter_joint_search_thresh < bsize) {
1435            int rate_mv;
1436            joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1437                                mi_row, mi_col, seg_mvs[i],
1438                                &rate_mv);
1439            seg_mvs[i][mbmi->ref_frame[0]].as_int =
1440                frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1441            seg_mvs[i][mbmi->ref_frame[1]].as_int =
1442                frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1443          }
1444          // restore src pointers
1445          mi_buf_restore(x, orig_src, orig_pre);
1446        }
1447
1448        rate = labels2mode(x, i, this_mode, &mode_mv[this_mode],
1449                           &second_mode_mv[this_mode], frame_mv, seg_mvs[i],
1450                           bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1451                           x->mvcost, cpi);
1452
1453        // Trap vectors that reach beyond the UMV borders
1454        if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
1455            ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
1456            ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
1457            ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
1458          continue;
1459        }
1460        if (mbmi->ref_frame[1] > 0 &&
1461            mv_check_bounds(x, &second_mode_mv[this_mode]))
1462          continue;
1463
1464        this_rd = encode_inter_mb_segment(&cpi->common,
1465                                          x, i, &labelyrate,
1466                                          &distortion, t_above_s, t_left_s);
1467        this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
1468        rate += labelyrate;
1469
1470        if (this_rd < best_label_rd) {
1471          sbr = rate;
1472          sbd = distortion;
1473          bestlabelyrate = labelyrate;
1474          mode_selected = this_mode;
1475          best_label_rd = this_rd;
1476          best_eobs[i] = x->e_mbd.plane[0].eobs[i];
1477          vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s));
1478          vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s));
1479        }
1480      } /*for each 4x4 mode*/
1481
1482      vpx_memcpy(t_above, t_above_b, sizeof(t_above));
1483      vpx_memcpy(t_left, t_left_b, sizeof(t_left));
1484
1485      labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
1486                  &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
1487                  bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1488                  x->mvcost, cpi);
1489
1490      br += sbr;
1491      bd += sbd;
1492      segmentyrate += bestlabelyrate;
1493      this_segment_rd += best_label_rd;
1494      other_segment_rd += best_other_rd;
1495
1496      for (j = 1; j < bh; ++j)
1497        vpx_memcpy(&x->partition_info->bmi[i + j * 2],
1498                   &x->partition_info->bmi[i],
1499                   sizeof(x->partition_info->bmi[i]));
1500      for (j = 1; j < bw; ++j)
1501        vpx_memcpy(&x->partition_info->bmi[i + j],
1502                   &x->partition_info->bmi[i],
1503                   sizeof(x->partition_info->bmi[i]));
1504    }
1505  } /* for each label */
1506
1507  if (this_segment_rd < bsi->segment_rd) {
1508    bsi->r = br;
1509    bsi->d = bd;
1510    bsi->segment_yrate = segmentyrate;
1511    bsi->segment_rd = this_segment_rd;
1512
1513    // store everything needed to come back to this!!
1514    for (i = 0; i < 4; i++) {
1515      bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
1516      if (mbmi->ref_frame[1] > 0)
1517        bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv;
1518      bsi->modes[i] = x->partition_info->bmi[i].mode;
1519      bsi->eobs[i] = best_eobs[i];
1520    }
1521  }
1522}
1523
1524static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
1525                                       int_mv *best_ref_mv,
1526                                       int_mv *second_best_ref_mv,
1527                                       int64_t best_rd,
1528                                       int *returntotrate,
1529                                       int *returnyrate,
1530                                       int *returndistortion,
1531                                       int *skippable, int mvthresh,
1532                                       int_mv seg_mvs[4][MAX_REF_FRAMES],
1533                                       int mi_row, int mi_col) {
1534  int i;
1535  BEST_SEG_INFO bsi;
1536  MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
1537
1538  vpx_memset(&bsi, 0, sizeof(bsi));
1539
1540  bsi.segment_rd = best_rd;
1541  bsi.ref_mv = best_ref_mv;
1542  bsi.second_ref_mv = second_best_ref_mv;
1543  bsi.mvp.as_int = best_ref_mv->as_int;
1544  bsi.mvthresh = mvthresh;
1545
1546  for (i = 0; i < 4; i++)
1547    bsi.modes[i] = ZEROMV;
1548
1549  rd_check_segment_txsize(cpi, x, &bsi, seg_mvs, mi_row, mi_col);
1550
1551  /* set it to the best */
1552  for (i = 0; i < 4; i++) {
1553    x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int = bsi.mvs[i].as_int;
1554    if (mbmi->ref_frame[1] > 0)
1555      x->e_mbd.mode_info_context->bmi[i].as_mv[1].as_int =
1556      bsi.second_mvs[i].as_int;
1557    x->e_mbd.plane[0].eobs[i] = bsi.eobs[i];
1558  }
1559
1560  /* save partitions */
1561  x->partition_info->count = 4;
1562
1563  for (i = 0; i < x->partition_info->count; i++) {
1564    x->partition_info->bmi[i].mode = bsi.modes[i];
1565    x->partition_info->bmi[i].mv.as_mv = bsi.mvs[i].as_mv;
1566    if (mbmi->ref_frame[1] > 0)
1567      x->partition_info->bmi[i].second_mv.as_mv = bsi.second_mvs[i].as_mv;
1568  }
1569  /*
1570   * used to set mbmi->mv.as_int
1571   */
1572  x->partition_info->bmi[3].mv.as_int = bsi.mvs[3].as_int;
1573  if (mbmi->ref_frame[1] > 0)
1574    x->partition_info->bmi[3].second_mv.as_int = bsi.second_mvs[3].as_int;
1575
1576  *returntotrate = bsi.r;
1577  *returndistortion = bsi.d;
1578  *returnyrate = bsi.segment_yrate;
1579  *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8);
1580  mbmi->mode = bsi.modes[3];
1581
1582  return (int)(bsi.segment_rd);
1583}
1584
1585static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
1586                    uint8_t *ref_y_buffer, int ref_y_stride,
1587                    int ref_frame, enum BlockSize block_size ) {
1588  MACROBLOCKD *xd = &x->e_mbd;
1589  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
1590  int_mv this_mv;
1591  int i;
1592  int zero_seen = 0;
1593  int best_index = 0;
1594  int best_sad = INT_MAX;
1595  int this_sad = INT_MAX;
1596
1597  uint8_t *src_y_ptr = x->plane[0].src.buf;
1598  uint8_t *ref_y_ptr;
1599  int row_offset, col_offset;
1600
1601  // Get the sad for each candidate reference mv
1602  for (i = 0; i < MAX_MV_REF_CANDIDATES; i++) {
1603    this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int;
1604
1605    // The list is at an end if we see 0 for a second time.
1606    if (!this_mv.as_int && zero_seen)
1607      break;
1608    zero_seen = zero_seen || !this_mv.as_int;
1609
1610    row_offset = this_mv.as_mv.row >> 3;
1611    col_offset = this_mv.as_mv.col >> 3;
1612    ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
1613
1614    // Find sad for current vector.
1615    this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
1616                                           ref_y_ptr, ref_y_stride,
1617                                           0x7fffffff);
1618
1619    // Note if it is the best so far.
1620    if (this_sad < best_sad) {
1621      best_sad = this_sad;
1622      best_index = i;
1623    }
1624  }
1625
1626  // Note the index of the mv that worked best in the reference list.
1627  x->mv_best_ref_index[ref_frame] = best_index;
1628}
1629
1630static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
1631                                     unsigned int *ref_costs_single,
1632                                     unsigned int *ref_costs_comp,
1633                                     vp9_prob *comp_mode_p) {
1634  VP9_COMMON *const cm = &cpi->common;
1635  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
1636  int seg_ref_active = vp9_segfeature_active(xd, segment_id,
1637                                             SEG_LVL_REF_FRAME);
1638  if (seg_ref_active) {
1639    vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
1640    vpx_memset(ref_costs_comp,   0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
1641    *comp_mode_p = 128;
1642  } else {
1643    vp9_prob intra_inter_p = vp9_get_pred_prob(cm, xd, PRED_INTRA_INTER);
1644    vp9_prob comp_inter_p = 128;
1645
1646    if (cm->comp_pred_mode == HYBRID_PREDICTION) {
1647      comp_inter_p = vp9_get_pred_prob(cm, xd, PRED_COMP_INTER_INTER);
1648      *comp_mode_p = comp_inter_p;
1649    } else {
1650      *comp_mode_p = 128;
1651    }
1652
1653    ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
1654
1655    if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) {
1656      vp9_prob ref_single_p1 = vp9_get_pred_prob(cm, xd, PRED_SINGLE_REF_P1);
1657      vp9_prob ref_single_p2 = vp9_get_pred_prob(cm, xd, PRED_SINGLE_REF_P2);
1658      unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
1659
1660      if (cm->comp_pred_mode == HYBRID_PREDICTION)
1661        base_cost += vp9_cost_bit(comp_inter_p, 0);
1662
1663      ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
1664          ref_costs_single[ALTREF_FRAME] = base_cost;
1665      ref_costs_single[LAST_FRAME]   += vp9_cost_bit(ref_single_p1, 0);
1666      ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
1667      ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
1668      ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
1669      ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
1670    } else {
1671      ref_costs_single[LAST_FRAME]   = 512;
1672      ref_costs_single[GOLDEN_FRAME] = 512;
1673      ref_costs_single[ALTREF_FRAME] = 512;
1674    }
1675    if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) {
1676      vp9_prob ref_comp_p = vp9_get_pred_prob(cm, xd, PRED_COMP_REF_P);
1677      unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
1678
1679      if (cm->comp_pred_mode == HYBRID_PREDICTION)
1680        base_cost += vp9_cost_bit(comp_inter_p, 1);
1681
1682      ref_costs_comp[LAST_FRAME]   = base_cost + vp9_cost_bit(ref_comp_p, 0);
1683      ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
1684    } else {
1685      ref_costs_comp[LAST_FRAME]   = 512;
1686      ref_costs_comp[GOLDEN_FRAME] = 512;
1687    }
1688  }
1689}
1690
1691static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
1692                                 int mode_index,
1693                                 PARTITION_INFO *partition,
1694                                 int_mv *ref_mv,
1695                                 int_mv *second_ref_mv,
1696                                 int64_t comp_pred_diff[NB_PREDICTION_TYPES],
1697                                 int64_t txfm_size_diff[NB_TXFM_MODES]) {
1698  MACROBLOCKD *const xd = &x->e_mbd;
1699
1700  // Take a snapshot of the coding context so it can be
1701  // restored if we decide to encode this way
1702  ctx->skip = x->skip;
1703  ctx->best_mode_index = mode_index;
1704  ctx->mic = *xd->mode_info_context;
1705
1706  if (partition)
1707    ctx->partition_info = *partition;
1708
1709  ctx->best_ref_mv.as_int = ref_mv->as_int;
1710  ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
1711
1712  ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY];
1713  ctx->comp_pred_diff   = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
1714  ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
1715
1716  memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff));
1717}
1718
1719static void setup_pred_block(const MACROBLOCKD *xd,
1720                             struct buf_2d dst[MAX_MB_PLANE],
1721                             const YV12_BUFFER_CONFIG *src,
1722                             int mi_row, int mi_col,
1723                             const struct scale_factors *scale,
1724                             const struct scale_factors *scale_uv) {
1725  int i;
1726
1727  dst[0].buf = src->y_buffer;
1728  dst[0].stride = src->y_stride;
1729  dst[1].buf = src->u_buffer;
1730  dst[2].buf = src->v_buffer;
1731  dst[1].stride = dst[2].stride = src->uv_stride;
1732#if CONFIG_ALPHA
1733  dst[3].buf = src->alpha_buffer;
1734  dst[3].stride = src->alpha_stride;
1735#endif
1736
1737  // TODO(jkoleszar): Make scale factors per-plane data
1738  for (i = 0; i < MAX_MB_PLANE; i++) {
1739    setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
1740                     i ? scale_uv : scale,
1741                     xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1742  }
1743}
1744
1745static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
1746                               int idx, MV_REFERENCE_FRAME frame_type,
1747                               enum BlockSize block_size,
1748                               int mi_row, int mi_col,
1749                               int_mv frame_nearest_mv[MAX_REF_FRAMES],
1750                               int_mv frame_near_mv[MAX_REF_FRAMES],
1751                               struct buf_2d yv12_mb[4][MAX_MB_PLANE],
1752                               struct scale_factors scale[MAX_REF_FRAMES]) {
1753  VP9_COMMON *cm = &cpi->common;
1754  YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
1755  MACROBLOCKD *const xd = &x->e_mbd;
1756  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
1757
1758  // set up scaling factors
1759  scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
1760
1761  scale[frame_type].x_offset_q4 =
1762      ROUND_POWER_OF_TWO(mi_col * MI_SIZE * scale[frame_type].x_scale_fp,
1763       VP9_REF_SCALE_SHIFT) & 0xf;
1764  scale[frame_type].y_offset_q4 =
1765      ROUND_POWER_OF_TWO(mi_row * MI_SIZE * scale[frame_type].y_scale_fp,
1766       VP9_REF_SCALE_SHIFT) & 0xf;
1767
1768  // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
1769  // use the UV scaling factors.
1770  setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col,
1771                   &scale[frame_type], &scale[frame_type]);
1772
1773  // Gets an initial list of candidate vectors from neighbours and orders them
1774  vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context,
1775                   xd->prev_mode_info_context,
1776                   frame_type,
1777                   mbmi->ref_mvs[frame_type],
1778                   cpi->common.ref_frame_sign_bias);
1779
1780  // Candidate refinement carried out at encoder and decoder
1781  vp9_find_best_ref_mvs(xd,
1782                        mbmi->ref_mvs[frame_type],
1783                        &frame_nearest_mv[frame_type],
1784                        &frame_near_mv[frame_type]);
1785
1786  // Further refinement that is encode side only to test the top few candidates
1787  // in full and choose the best as the centre point for subsequent searches.
1788  // The current implementation doesn't support scaling.
1789  if (scale[frame_type].x_scale_fp == (1 << VP9_REF_SCALE_SHIFT) &&
1790      scale[frame_type].y_scale_fp == (1 << VP9_REF_SCALE_SHIFT))
1791    mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
1792            frame_type, block_size);
1793}
1794
1795static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) {
1796  YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
1797  int fb = get_ref_frame_idx(cpi, ref_frame);
1798  if (cpi->scaled_ref_idx[fb] != cpi->common.ref_frame_map[fb])
1799    scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb]];
1800  return scaled_ref_frame;
1801}
1802
1803static void model_rd_from_var_lapndz(int var, int n, int qstep,
1804                                     int *rate, int *dist) {
1805  // This function models the rate and distortion for a Laplacian
1806  // source with given variance when quantized with a uniform quantizer
1807  // with given stepsize. The closed form expressions are in:
1808  // Hang and Chen, "Source Model for transform video coder and its
1809  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
1810  // Sys. for Video Tech., April 1997.
1811  // The function is implemented as piecewise approximation to the
1812  // exact computation.
1813  // TODO(debargha): Implement the functions by interpolating from a
1814  // look-up table
1815  vp9_clear_system_state();
1816  if (var == 0 || n == 0) {
1817    *rate = 0;
1818    *dist = 0;
1819  } else {
1820    double D, R;
1821    double s2 = (double) var / n;
1822    double s = sqrt(s2);
1823    double x = qstep / s;
1824    if (x > 1.0) {
1825      double y = exp(-x / 2);
1826      double y2 = y * y;
1827      D = 2.069981728764738 * y2 - 2.764286806516079 * y + 1.003956960819275;
1828      R = 0.924056758535089 * y2 + 2.738636469814024 * y - 0.005169662030017;
1829    } else {
1830      double x2 = x * x;
1831      D = 0.075303187668830 * x2 + 0.004296954321112 * x - 0.000413209252807;
1832      if (x > 0.125)
1833        R = 1 / (-0.03459733614226 * x2 + 0.36561675733603 * x +
1834                 0.1626989668625);
1835      else
1836        R = -1.442252874826093 * log(x) + 1.944647760719664;
1837    }
1838    if (R < 0) {
1839      *rate = 0;
1840      *dist = var;
1841    } else {
1842      *rate = (n * R * 256 + 0.5);
1843      *dist = (n * D * s2 + 0.5);
1844    }
1845  }
1846  vp9_clear_system_state();
1847}
1848
1849static enum BlockSize get_plane_block_size(BLOCK_SIZE_TYPE bsize,
1850                                           struct macroblockd_plane *pd) {
1851  return get_block_size(plane_block_width(bsize, pd),
1852                        plane_block_height(bsize, pd));
1853}
1854
1855static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
1856                            MACROBLOCK *x, MACROBLOCKD *xd,
1857                            int *out_rate_sum, int *out_dist_sum) {
1858  // Note our transform coeffs are 8 times an orthogonal transform.
1859  // Hence quantizer step is also 8 times. To get effective quantizer
1860  // we need to divide by 8 before sending to modeling function.
1861  unsigned int sse, var;
1862  int i, rate_sum = 0, dist_sum = 0;
1863
1864  for (i = 0; i < MAX_MB_PLANE; ++i) {
1865    struct macroblock_plane *const p = &x->plane[i];
1866    struct macroblockd_plane *const pd = &xd->plane[i];
1867
1868    // TODO(dkovalev) the same code in get_plane_block_size
1869    const int bw = plane_block_width(bsize, pd);
1870    const int bh = plane_block_height(bsize, pd);
1871    const enum BlockSize bs = get_block_size(bw, bh);
1872    int rate, dist;
1873    var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
1874                             pd->dst.buf, pd->dst.stride, &sse);
1875    model_rd_from_var_lapndz(var, bw * bh, pd->dequant[1] >> 3, &rate, &dist);
1876
1877    rate_sum += rate;
1878    dist_sum += dist;
1879  }
1880
1881  *out_rate_sum = rate_sum;
1882  *out_dist_sum = dist_sum;
1883}
1884
1885static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) {
1886  MACROBLOCKD *xd = &x->e_mbd;
1887  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
1888
1889  const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
1890  const int m = vp9_switchable_interp_map[mbmi->interp_filter];
1891  return SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
1892}
1893
1894static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1895                                 BLOCK_SIZE_TYPE bsize,
1896                                 int mi_row, int mi_col,
1897                                 int_mv *tmp_mv, int *rate_mv) {
1898  MACROBLOCKD *xd = &x->e_mbd;
1899  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
1900  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
1901  int bestsme = INT_MAX;
1902  int further_steps, step_param = cpi->sf.first_step;
1903  int sadpb = x->sadperbit16;
1904  int_mv mvp_full;
1905  int ref = mbmi->ref_frame[0];
1906  int_mv ref_mv = mbmi->ref_mvs[ref][0];
1907  int sr = 0;
1908  const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
1909
1910  int tmp_col_min = x->mv_col_min;
1911  int tmp_col_max = x->mv_col_max;
1912  int tmp_row_min = x->mv_row_min;
1913  int tmp_row_max = x->mv_row_max;
1914
1915  YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref);
1916
1917  if (scaled_ref_frame) {
1918    int i;
1919    // Swap out the reference frame for a version that's been scaled to
1920    // match the resolution of the current frame, allowing the existing
1921    // motion search code to be used without additional modifications.
1922    for (i = 0; i < MAX_MB_PLANE; i++)
1923      backup_yv12[i] = xd->plane[i].pre[0];
1924
1925    setup_pre_planes(xd, scaled_ref_frame, NULL, mi_row, mi_col,
1926                     NULL, NULL);
1927  }
1928
1929  vp9_clamp_mv_min_max(x, &ref_mv);
1930
1931  sr = vp9_init_search_range(cpi->common.width, cpi->common.height);
1932
1933  // mvp_full.as_int = ref_mv[0].as_int;
1934  mvp_full.as_int =
1935      mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
1936
1937  mvp_full.as_mv.col >>= 3;
1938  mvp_full.as_mv.row >>= 3;
1939
1940  // adjust search range according to sr from mv prediction
1941  step_param = MAX(step_param, sr);
1942
1943  // Further step/diamond searches as necessary
1944  further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
1945
1946  bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1947                                   sadpb, further_steps, 1,
1948                                   &cpi->fn_ptr[block_size],
1949                                   &ref_mv, tmp_mv);
1950
1951  x->mv_col_min = tmp_col_min;
1952  x->mv_col_max = tmp_col_max;
1953  x->mv_row_min = tmp_row_min;
1954  x->mv_row_max = tmp_row_max;
1955
1956  if (bestsme < INT_MAX) {
1957    int dis; /* TODO: use dis in distortion calculation later. */
1958    unsigned int sse;
1959    cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv,
1960                                 x->errorperbit,
1961                                 &cpi->fn_ptr[block_size],
1962                                 x->nmvjointcost, x->mvcost,
1963                                 &dis, &sse);
1964  }
1965  *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv,
1966                             x->nmvjointcost, x->mvcost,
1967                             96, xd->allow_high_precision_mv);
1968  if (scaled_ref_frame) {
1969    int i;
1970    for (i = 0; i < MAX_MB_PLANE; i++)
1971      xd->plane[i].pre[0] = backup_yv12[i];
1972  }
1973}
1974
1975static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1976                                BLOCK_SIZE_TYPE bsize,
1977                                int_mv *frame_mv,
1978                                int mi_row, int mi_col,
1979                                int_mv single_newmv[MAX_REF_FRAMES],
1980                                int *rate_mv) {
1981  int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
1982  MACROBLOCKD *xd = &x->e_mbd;
1983  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
1984  int refs[2] = { mbmi->ref_frame[0],
1985    (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
1986  int_mv ref_mv[2];
1987  const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
1988  int ite;
1989  // Prediction buffer from second frame.
1990  uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
1991
1992  // Do joint motion search in compound mode to get more accurate mv.
1993  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
1994  struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}};
1995  struct buf_2d scaled_first_yv12;
1996  int last_besterr[2] = {INT_MAX, INT_MAX};
1997  YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
1998  scaled_ref_frame[0] = get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
1999  scaled_ref_frame[1] = get_scaled_ref_frame(cpi, mbmi->ref_frame[1]);
2000
2001  ref_mv[0] = mbmi->ref_mvs[refs[0]][0];
2002  ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
2003
2004  if (scaled_ref_frame[0]) {
2005    int i;
2006    // Swap out the reference frame for a version that's been scaled to
2007    // match the resolution of the current frame, allowing the existing
2008    // motion search code to be used without additional modifications.
2009    for (i = 0; i < MAX_MB_PLANE; i++)
2010      backup_yv12[i] = xd->plane[i].pre[0];
2011    setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col,
2012                     NULL, NULL);
2013  }
2014
2015  if (scaled_ref_frame[1]) {
2016    int i;
2017    for (i = 0; i < MAX_MB_PLANE; i++)
2018      backup_second_yv12[i] = xd->plane[i].pre[1];
2019
2020    setup_pre_planes(xd, scaled_ref_frame[1], NULL, mi_row, mi_col,
2021                     NULL, NULL);
2022  }
2023
2024  xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0],
2025                                         mi_row, mi_col);
2026  xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1],
2027                                         mi_row, mi_col);
2028  scaled_first_yv12 = xd->plane[0].pre[0];
2029
2030  // Initialize mv using single prediction mode result.
2031  frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2032  frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2033
2034  // Allow joint search multiple times iteratively for each ref frame
2035  // and break out the search loop if it couldn't find better mv.
2036  for (ite = 0; ite < 4; ite++) {
2037    struct buf_2d ref_yv12[2];
2038    int bestsme = INT_MAX;
2039    int sadpb = x->sadperbit16;
2040    int_mv tmp_mv;
2041    int search_range = 3;
2042
2043    int tmp_col_min = x->mv_col_min;
2044    int tmp_col_max = x->mv_col_max;
2045    int tmp_row_min = x->mv_row_min;
2046    int tmp_row_max = x->mv_row_max;
2047    int id = ite % 2;
2048
2049    // Initialized here because of compiler problem in Visual Studio.
2050    ref_yv12[0] = xd->plane[0].pre[0];
2051    ref_yv12[1] = xd->plane[0].pre[1];
2052
2053    // Get pred block from second frame.
2054    vp9_build_inter_predictor(ref_yv12[!id].buf,
2055                              ref_yv12[!id].stride,
2056                              second_pred, pw,
2057                              &frame_mv[refs[!id]],
2058                              &xd->scale_factor[!id],
2059                              pw, ph, 0,
2060                              &xd->subpix);
2061
2062    // Compound motion search on first ref frame.
2063    if (id)
2064      xd->plane[0].pre[0] = ref_yv12[id];
2065    vp9_clamp_mv_min_max(x, &ref_mv[id]);
2066
2067    // Use mv result from single mode as mvp.
2068    tmp_mv.as_int = frame_mv[refs[id]].as_int;
2069
2070    tmp_mv.as_mv.col >>= 3;
2071    tmp_mv.as_mv.row >>= 3;
2072
2073    // Small-range full-pixel motion search
2074    bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
2075                                       search_range,
2076                                       &cpi->fn_ptr[block_size],
2077                                       x->nmvjointcost, x->mvcost,
2078                                       &ref_mv[id], second_pred,
2079                                       pw, ph);
2080
2081    x->mv_col_min = tmp_col_min;
2082    x->mv_col_max = tmp_col_max;
2083    x->mv_row_min = tmp_row_min;
2084    x->mv_row_max = tmp_row_max;
2085
2086    if (bestsme < INT_MAX) {
2087      int dis; /* TODO: use dis in distortion calculation later. */
2088      unsigned int sse;
2089
2090      bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv,
2091                                             &ref_mv[id],
2092                                             x->errorperbit,
2093                                             &cpi->fn_ptr[block_size],
2094                                             x->nmvjointcost, x->mvcost,
2095                                             &dis, &sse, second_pred,
2096                                             pw, ph);
2097    }
2098
2099    if (id)
2100      xd->plane[0].pre[0] = scaled_first_yv12;
2101
2102    if (bestsme < last_besterr[id]) {
2103      frame_mv[refs[id]].as_int = tmp_mv.as_int;
2104      last_besterr[id] = bestsme;
2105    } else {
2106      break;
2107    }
2108  }
2109
2110  // restore the predictor
2111  if (scaled_ref_frame[0]) {
2112    int i;
2113    for (i = 0; i < MAX_MB_PLANE; i++)
2114      xd->plane[i].pre[0] = backup_yv12[i];
2115  }
2116
2117  if (scaled_ref_frame[1]) {
2118    int i;
2119    for (i = 0; i < MAX_MB_PLANE; i++)
2120      xd->plane[i].pre[1] = backup_second_yv12[i];
2121  }
2122  *rate_mv  = vp9_mv_bit_cost(&frame_mv[refs[0]],
2123                              &mbmi->ref_mvs[refs[0]][0],
2124                              x->nmvjointcost, x->mvcost, 96,
2125                              x->e_mbd.allow_high_precision_mv);
2126  *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
2127                              &mbmi->ref_mvs[refs[1]][0],
2128                              x->nmvjointcost, x->mvcost, 96,
2129                              x->e_mbd.allow_high_precision_mv);
2130
2131  vpx_free(second_pred);
2132}
2133
2134static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2135                                 BLOCK_SIZE_TYPE bsize,
2136                                 int64_t txfm_cache[],
2137                                 int *rate2, int *distortion, int *skippable,
2138                                 int *rate_y, int *distortion_y,
2139                                 int *rate_uv, int *distortion_uv,
2140                                 int *mode_excluded, int *disable_skip,
2141                                 INTERPOLATIONFILTERTYPE *best_filter,
2142                                 int_mv *frame_mv,
2143                                 int mi_row, int mi_col,
2144                                 int_mv single_newmv[MAX_REF_FRAMES]) {
2145  const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize);
2146
2147  VP9_COMMON *cm = &cpi->common;
2148  MACROBLOCKD *xd = &x->e_mbd;
2149  const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
2150  const enum BlockSize uv_block_size = get_plane_block_size(bsize,
2151                                                            &xd->plane[1]);
2152  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2153  const int is_comp_pred = (mbmi->ref_frame[1] > 0);
2154  const int num_refs = is_comp_pred ? 2 : 1;
2155  const int this_mode = mbmi->mode;
2156  int i;
2157  int refs[2] = { mbmi->ref_frame[0],
2158    (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2159  int_mv cur_mv[2];
2160  int64_t this_rd = 0;
2161  unsigned char tmp_buf[MAX_MB_PLANE][64 * 64];
2162  int pred_exists = 0;
2163  int interpolating_intpel_seen = 0;
2164  int intpel_mv;
2165  int64_t rd, best_rd = INT64_MAX;
2166
2167  switch (this_mode) {
2168    int rate_mv;
2169    case NEWMV:
2170      if (is_comp_pred) {
2171        // Initialize mv using single prediction mode result.
2172        frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2173        frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2174
2175        if (cpi->sf.comp_inter_joint_search_thresh < bsize) {
2176          joint_motion_search(cpi, x, bsize, frame_mv,
2177                              mi_row, mi_col, single_newmv, &rate_mv);
2178        } else {
2179          rate_mv  = vp9_mv_bit_cost(&frame_mv[refs[0]],
2180                                     &mbmi->ref_mvs[refs[0]][0],
2181                                     x->nmvjointcost, x->mvcost, 96,
2182                                     x->e_mbd.allow_high_precision_mv);
2183          rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
2184                                     &mbmi->ref_mvs[refs[1]][0],
2185                                     x->nmvjointcost, x->mvcost, 96,
2186                                     x->e_mbd.allow_high_precision_mv);
2187        }
2188        if (frame_mv[refs[0]].as_int == INVALID_MV ||
2189            frame_mv[refs[1]].as_int == INVALID_MV)
2190          return INT64_MAX;
2191        *rate2 += rate_mv;
2192
2193      } else {
2194        int_mv tmp_mv;
2195        single_motion_search(cpi, x, bsize, mi_row, mi_col,
2196                             &tmp_mv, &rate_mv);
2197        *rate2 += rate_mv;
2198        frame_mv[refs[0]].as_int =
2199            xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2200        single_newmv[refs[0]].as_int = tmp_mv.as_int;
2201      }
2202      break;
2203    case NEARMV:
2204    case NEARESTMV:
2205    case ZEROMV:
2206    default:
2207      break;
2208  }
2209  for (i = 0; i < num_refs; ++i) {
2210    cur_mv[i] = frame_mv[refs[i]];
2211    // Clip "next_nearest" so that it does not extend to far out of image
2212    if (this_mode == NEWMV)
2213      assert(!clamp_mv2(&cur_mv[i], xd));
2214    else
2215      clamp_mv2(&cur_mv[i], xd);
2216
2217    if (mv_check_bounds(x, &cur_mv[i]))
2218      return INT64_MAX;
2219    mbmi->mv[i].as_int = cur_mv[i].as_int;
2220  }
2221
2222  /* We don't include the cost of the second reference here, because there
2223   * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2224   * words if you present them in that order, the second one is always known
2225   * if the first is known */
2226  *rate2 += vp9_cost_mv_ref(cpi, this_mode,
2227                            mbmi->mb_mode_context[mbmi->ref_frame[0]]);
2228
2229  pred_exists = 0;
2230  interpolating_intpel_seen = 0;
2231  // Are all MVs integer pel for Y and UV
2232  intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
2233      (mbmi->mv[0].as_mv.col & 15) == 0;
2234  if (is_comp_pred)
2235    intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
2236        (mbmi->mv[1].as_mv.col & 15) == 0;
2237  // Search for best switchable filter by checking the variance of
2238  // pred error irrespective of whether the filter will be used
2239  if (cpi->speed > 4) {
2240    *best_filter = EIGHTTAP;
2241  } else {
2242    int i, newbest;
2243    int tmp_rate_sum = 0, tmp_dist_sum = 0;
2244    for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
2245      int rs = 0;
2246      const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
2247      const int is_intpel_interp = intpel_mv &&
2248          vp9_is_interpolating_filter[filter];
2249      mbmi->interp_filter = filter;
2250      vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2251
2252      if (cm->mcomp_filter_type == SWITCHABLE)
2253        rs = get_switchable_rate(cm, x);
2254
2255      if (interpolating_intpel_seen && is_intpel_interp) {
2256        rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_sum, tmp_dist_sum);
2257      } else {
2258        int rate_sum = 0, dist_sum = 0;
2259        vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2260        model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2261        rd = RDCOST(x->rdmult, x->rddiv, rs + rate_sum, dist_sum);
2262        if (!interpolating_intpel_seen && is_intpel_interp) {
2263          tmp_rate_sum = rate_sum;
2264          tmp_dist_sum = dist_sum;
2265        }
2266      }
2267      newbest = i == 0 || rd < best_rd;
2268
2269      if (newbest) {
2270        best_rd = rd;
2271        *best_filter = mbmi->interp_filter;
2272      }
2273
2274      if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
2275          (cm->mcomp_filter_type != SWITCHABLE &&
2276           cm->mcomp_filter_type == mbmi->interp_filter)) {
2277        int p;
2278
2279        for (p = 0; p < MAX_MB_PLANE; p++) {
2280          const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y;
2281          const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x;
2282          int i;
2283
2284          for (i = 0; i < y; i++)
2285            vpx_memcpy(&tmp_buf[p][64 * i],
2286                       xd->plane[p].dst.buf + i * xd->plane[p].dst.stride, x);
2287        }
2288        pred_exists = 1;
2289      }
2290      interpolating_intpel_seen |= is_intpel_interp;
2291    }
2292  }
2293
2294  // Set the appripriate filter
2295  mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
2296      cm->mcomp_filter_type : *best_filter;
2297  vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2298
2299
2300  if (pred_exists) {
2301    int p;
2302
2303    for (p = 0; p < MAX_MB_PLANE; p++) {
2304      const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y;
2305      const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x;
2306      int i;
2307
2308      for (i = 0; i < y; i++)
2309        vpx_memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride,
2310                   &tmp_buf[p][64 * i], x);
2311    }
2312  } else {
2313    // Handles the special case when a filter that is not in the
2314    // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2315    vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2316  }
2317
2318  if (cpi->common.mcomp_filter_type == SWITCHABLE)
2319    *rate2 += get_switchable_rate(cm, x);
2320
2321  if (cpi->active_map_enabled && x->active_ptr[0] == 0)
2322    x->skip = 1;
2323  else if (x->encode_breakout) {
2324    unsigned int var, sse;
2325    int threshold = (xd->plane[0].dequant[1]
2326                     * xd->plane[0].dequant[1] >> 4);
2327
2328    if (threshold < x->encode_breakout)
2329      threshold = x->encode_breakout;
2330
2331    var = cpi->fn_ptr[block_size].vf(x->plane[0].src.buf,
2332                                     x->plane[0].src.stride,
2333                                     xd->plane[0].dst.buf,
2334                                     xd->plane[0].dst.stride,
2335                                     &sse);
2336
2337    if ((int)sse < threshold) {
2338      unsigned int q2dc = xd->plane[0].dequant[0];
2339      /* If there is no codeable 2nd order dc
2340         or a very small uniform pixel change change */
2341      if ((sse - var < q2dc * q2dc >> 4) ||
2342          (sse / 2 > var && sse - var < 64)) {
2343        // Check u and v to make sure skip is ok
2344        int sse2;
2345        unsigned int sse2u, sse2v;
2346        var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf,
2347                                            x->plane[1].src.stride,
2348                                            xd->plane[1].dst.buf,
2349                                            xd->plane[1].dst.stride, &sse2u);
2350        var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf,
2351                                            x->plane[1].src.stride,
2352                                            xd->plane[2].dst.buf,
2353                                            xd->plane[1].dst.stride, &sse2v);
2354        sse2 = sse2u + sse2v;
2355
2356        if (sse2 * 2 < threshold) {
2357          x->skip = 1;
2358          *distortion = sse + sse2;
2359          *rate2 = 500;
2360
2361          /* for best_yrd calculation */
2362          *rate_uv = 0;
2363          *distortion_uv = sse2;
2364
2365          *disable_skip = 1;
2366          this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2367        }
2368      }
2369    }
2370  }
2371
2372  if (!x->skip) {
2373    int skippable_y, skippable_uv;
2374
2375    // Y cost and distortion
2376    super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y,
2377                    bsize, txfm_cache);
2378
2379    *rate2 += *rate_y;
2380    *distortion += *distortion_y;
2381
2382    super_block_uvrd(cm, x, rate_uv, distortion_uv,
2383                     &skippable_uv, bsize);
2384
2385    *rate2 += *rate_uv;
2386    *distortion += *distortion_uv;
2387    *skippable = skippable_y && skippable_uv;
2388  }
2389
2390  if (!(*mode_excluded)) {
2391    if (is_comp_pred) {
2392      *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
2393    } else {
2394      *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
2395    }
2396  }
2397
2398  return this_rd;  // if 0, this will be re-calculated by caller
2399}
2400
2401void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2402                               int *returnrate, int *returndist,
2403                               BLOCK_SIZE_TYPE bsize,
2404                               PICK_MODE_CONTEXT *ctx) {
2405  VP9_COMMON *cm = &cpi->common;
2406  MACROBLOCKD *xd = &x->e_mbd;
2407  int rate_y = 0, rate_uv;
2408  int rate_y_tokenonly = 0, rate_uv_tokenonly;
2409  int dist_y = 0, dist_uv;
2410  int y_skip = 0, uv_skip;
2411  int64_t txfm_cache[NB_TXFM_MODES], err;
2412  MB_PREDICTION_MODE mode;
2413  TX_SIZE txfm_size;
2414  int rate4x4_y, rate4x4_y_tokenonly, dist4x4_y;
2415  int64_t err4x4 = INT64_MAX;
2416  int i;
2417
2418  vpx_memset(&txfm_cache,0,sizeof(txfm_cache));
2419  ctx->skip = 0;
2420  xd->mode_info_context->mbmi.mode = DC_PRED;
2421  xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME;
2422  err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2423                               &dist_y, &y_skip, bsize, txfm_cache);
2424  mode = xd->mode_info_context->mbmi.mode;
2425  txfm_size = xd->mode_info_context->mbmi.txfm_size;
2426  rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
2427                          &dist_uv, &uv_skip,
2428                          (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 :
2429                                                       bsize);
2430  if (bsize < BLOCK_SIZE_SB8X8)
2431    err4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4_y,
2432                                       &rate4x4_y_tokenonly,
2433                                       &dist4x4_y, err);
2434
2435  if (y_skip && uv_skip) {
2436    *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
2437                  vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
2438    *returndist = dist_y + (dist_uv >> 2);
2439    memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff));
2440    xd->mode_info_context->mbmi.mode = mode;
2441    xd->mode_info_context->mbmi.txfm_size = txfm_size;
2442  } else if (bsize < BLOCK_SIZE_SB8X8 && err4x4 < err) {
2443    *returnrate = rate4x4_y + rate_uv +
2444        vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
2445    *returndist = dist4x4_y + (dist_uv >> 2);
2446    vpx_memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff));
2447    xd->mode_info_context->mbmi.txfm_size = TX_4X4;
2448  } else {
2449    *returnrate = rate_y + rate_uv +
2450        vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
2451    *returndist = dist_y + (dist_uv >> 2);
2452    for (i = 0; i < NB_TXFM_MODES; i++) {
2453      ctx->txfm_rd_diff[i] = txfm_cache[i] - txfm_cache[cm->txfm_mode];
2454    }
2455    xd->mode_info_context->mbmi.txfm_size = txfm_size;
2456    xd->mode_info_context->mbmi.mode = mode;
2457  }
2458
2459  ctx->mic = *xd->mode_info_context;
2460}
2461
2462int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2463                                  int mi_row, int mi_col,
2464                                  int *returnrate,
2465                                  int *returndistortion,
2466                                  BLOCK_SIZE_TYPE bsize,
2467                                  PICK_MODE_CONTEXT *ctx) {
2468  VP9_COMMON *cm = &cpi->common;
2469  MACROBLOCKD *xd = &x->e_mbd;
2470  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2471  const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
2472  MB_PREDICTION_MODE this_mode;
2473  MB_PREDICTION_MODE best_mode = DC_PRED;
2474  MV_REFERENCE_FRAME ref_frame;
2475  unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;
2476  int comp_pred, i;
2477  int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
2478  struct buf_2d yv12_mb[4][MAX_MB_PLANE];
2479  int_mv single_newmv[MAX_REF_FRAMES];
2480  static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
2481                                    VP9_ALT_FLAG };
2482  int idx_list[4] = {0,
2483                     cpi->lst_fb_idx,
2484                     cpi->gld_fb_idx,
2485                     cpi->alt_fb_idx};
2486  int64_t best_rd = INT64_MAX;
2487  int64_t best_txfm_rd[NB_TXFM_MODES];
2488  int64_t best_txfm_diff[NB_TXFM_MODES];
2489  int64_t best_pred_diff[NB_PREDICTION_TYPES];
2490  int64_t best_pred_rd[NB_PREDICTION_TYPES];
2491  MB_MODE_INFO best_mbmode;
2492  int j;
2493  int mode_index, best_mode_index = 0;
2494  unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
2495  vp9_prob comp_mode_p;
2496  int64_t best_overall_rd = INT64_MAX;
2497  INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
2498  INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
2499  int rate_uv_intra[TX_SIZE_MAX_SB], rate_uv_tokenonly[TX_SIZE_MAX_SB];
2500  int dist_uv[TX_SIZE_MAX_SB], skip_uv[TX_SIZE_MAX_SB];
2501  MB_PREDICTION_MODE mode_uv[TX_SIZE_MAX_SB];
2502  struct scale_factors scale_factor[4];
2503  unsigned int ref_frame_mask = 0;
2504  unsigned int mode_mask = 0;
2505  int64_t mode_distortions[MB_MODE_COUNT] = {-1};
2506  int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
2507  int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
2508                                             cpi->common.y_dc_delta_q);
2509  int_mv seg_mvs[4][MAX_REF_FRAMES];
2510  union b_mode_info best_bmodes[4];
2511  PARTITION_INFO best_partition;
2512  int bwsl = b_width_log2(bsize);
2513  int bws = (1 << bwsl) / 4;  // mode_info step for subsize
2514  int bhsl = b_height_log2(bsize);
2515  int bhs = (1 << bhsl) / 4;  // mode_info step for subsize
2516
2517  for (i = 0; i < 4; i++) {
2518    int j;
2519
2520    for (j = 0; j < MAX_REF_FRAMES; j++)
2521      seg_mvs[i][j].as_int = INVALID_MV;
2522  }
2523  // Everywhere the flag is set the error is much higher than its neighbors.
2524  ctx->frames_with_high_error = 0;
2525  ctx->modes_with_high_error = 0;
2526
2527  xd->mode_info_context->mbmi.segment_id = segment_id;
2528  estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
2529                           &comp_mode_p);
2530  vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
2531  vpx_memset(&single_newmv, 0, sizeof(single_newmv));
2532
2533  for (i = 0; i < NB_PREDICTION_TYPES; ++i)
2534    best_pred_rd[i] = INT64_MAX;
2535  for (i = 0; i < NB_TXFM_MODES; i++)
2536    best_txfm_rd[i] = INT64_MAX;
2537
2538  // Create a mask set to 1 for each frame used by a smaller resolution.
2539  if (cpi->speed > 0) {
2540    switch (block_size) {
2541      case BLOCK_64X64:
2542        for (i = 0; i < 4; i++) {
2543          for (j = 0; j < 4; j++) {
2544            ref_frame_mask |= x->mb_context[i][j].frames_with_high_error;
2545            mode_mask |= x->mb_context[i][j].modes_with_high_error;
2546          }
2547        }
2548        for (i = 0; i < 4; i++) {
2549          ref_frame_mask |= x->sb32_context[i].frames_with_high_error;
2550          mode_mask |= x->sb32_context[i].modes_with_high_error;
2551        }
2552        break;
2553      case BLOCK_32X32:
2554        for (i = 0; i < 4; i++) {
2555          ref_frame_mask |=
2556              x->mb_context[xd->sb_index][i].frames_with_high_error;
2557          mode_mask |= x->mb_context[xd->sb_index][i].modes_with_high_error;
2558        }
2559        break;
2560      default:
2561        // Until we handle all block sizes set it to present;
2562        ref_frame_mask = 0;
2563        mode_mask = 0;
2564        break;
2565    }
2566    ref_frame_mask = ~ref_frame_mask;
2567    mode_mask = ~mode_mask;
2568  }
2569
2570  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
2571    if (cpi->ref_frame_flags & flag_list[ref_frame]) {
2572      setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size,
2573                         mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV],
2574                         yv12_mb, scale_factor);
2575    }
2576    frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
2577    frame_mv[ZEROMV][ref_frame].as_int = 0;
2578  }
2579  if (cpi->speed == 0
2580      || (cpi->speed > 0 && (ref_frame_mask & (1 << INTRA_FRAME)))) {
2581    mbmi->mode = DC_PRED;
2582    mbmi->ref_frame[0] = INTRA_FRAME;
2583    for (i = 0; i <= (bsize < BLOCK_SIZE_MB16X16 ? TX_4X4 :
2584                      (bsize < BLOCK_SIZE_SB32X32 ? TX_8X8 :
2585                       (bsize < BLOCK_SIZE_SB64X64 ? TX_16X16 : TX_32X32)));
2586         i++) {
2587      mbmi->txfm_size = i;
2588      rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[i], &rate_uv_tokenonly[i],
2589                              &dist_uv[i], &skip_uv[i],
2590                              (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 :
2591                                                           bsize);
2592      mode_uv[i] = mbmi->uv_mode;
2593    }
2594  }
2595
2596  for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
2597    int mode_excluded = 0;
2598    int64_t this_rd = INT64_MAX;
2599    int disable_skip = 0;
2600    int compmode_cost = 0;
2601    int rate2 = 0, rate_y = 0, rate_uv = 0;
2602    int distortion2 = 0, distortion_y = 0, distortion_uv = 0;
2603    int skippable;
2604    int64_t txfm_cache[NB_TXFM_MODES];
2605    int i;
2606
2607    for (i = 0; i < NB_TXFM_MODES; ++i)
2608      txfm_cache[i] = INT64_MAX;
2609
2610    // Test best rd so far against threshold for trying this mode.
2611    if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] *
2612                     cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 4)) ||
2613        cpi->rd_threshes[bsize][mode_index] == INT_MAX)
2614      continue;
2615
2616    // Do not allow compound prediction if the segment level reference
2617    // frame feature is in use as in this case there can only be one reference.
2618    if ((vp9_mode_order[mode_index].second_ref_frame > INTRA_FRAME) &&
2619         vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME))
2620      continue;
2621
2622    x->skip = 0;
2623    this_mode = vp9_mode_order[mode_index].mode;
2624    ref_frame = vp9_mode_order[mode_index].ref_frame;
2625
2626    if (cpi->speed > 0 && bsize >= BLOCK_SIZE_SB8X8) {
2627      if (!(ref_frame_mask & (1 << ref_frame))) {
2628        continue;
2629      }
2630      if (!(mode_mask & (1 << this_mode))) {
2631        continue;
2632      }
2633      if (vp9_mode_order[mode_index].second_ref_frame != NONE
2634          && !(ref_frame_mask
2635              & (1 << vp9_mode_order[mode_index].second_ref_frame))) {
2636        continue;
2637      }
2638    }
2639
2640    mbmi->ref_frame[0] = ref_frame;
2641    mbmi->ref_frame[1] = vp9_mode_order[mode_index].second_ref_frame;
2642
2643    if (!(ref_frame == INTRA_FRAME
2644        || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
2645      continue;
2646    }
2647    if (!(mbmi->ref_frame[1] == NONE
2648        || (cpi->ref_frame_flags & flag_list[mbmi->ref_frame[1]]))) {
2649      continue;
2650    }
2651
2652    // TODO(jingning, jkoleszar): scaling reference frame not supported for
2653    // SPLITMV.
2654    if (mbmi->ref_frame[0] > 0 &&
2655          (scale_factor[mbmi->ref_frame[0]].x_scale_fp !=
2656           (1 << VP9_REF_SCALE_SHIFT) ||
2657           scale_factor[mbmi->ref_frame[0]].y_scale_fp !=
2658           (1 << VP9_REF_SCALE_SHIFT)) &&
2659        this_mode == SPLITMV)
2660      continue;
2661
2662    if (mbmi->ref_frame[1] > 0 &&
2663          (scale_factor[mbmi->ref_frame[1]].x_scale_fp !=
2664           (1 << VP9_REF_SCALE_SHIFT) ||
2665           scale_factor[mbmi->ref_frame[1]].y_scale_fp !=
2666           (1 << VP9_REF_SCALE_SHIFT)) &&
2667        this_mode == SPLITMV)
2668      continue;
2669
2670    set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
2671                      scale_factor);
2672    comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
2673    mbmi->mode = this_mode;
2674    mbmi->uv_mode = DC_PRED;
2675
2676    // Evaluate all sub-pel filters irrespective of whether we can use
2677    // them for this frame.
2678    mbmi->interp_filter = cm->mcomp_filter_type;
2679    vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
2680
2681    if (bsize >= BLOCK_SIZE_SB8X8 &&
2682        (this_mode == I4X4_PRED || this_mode == SPLITMV))
2683      continue;
2684    if (bsize < BLOCK_SIZE_SB8X8 &&
2685        !(this_mode == I4X4_PRED || this_mode == SPLITMV))
2686      continue;
2687
2688    if (comp_pred) {
2689      if (!(cpi->ref_frame_flags & flag_list[mbmi->ref_frame[1]]))
2690        continue;
2691      set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
2692                        scale_factor);
2693
2694      mode_excluded =
2695          mode_excluded ?
2696              mode_excluded : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
2697    } else {
2698      // mbmi->ref_frame[1] = vp9_mode_order[mode_index].ref_frame[1];
2699      if (ref_frame != INTRA_FRAME) {
2700        if (mbmi->ref_frame[1] != INTRA_FRAME)
2701          mode_excluded =
2702              mode_excluded ?
2703                  mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
2704      }
2705    }
2706
2707    // Select predictors
2708    for (i = 0; i < MAX_MB_PLANE; i++) {
2709      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
2710      if (comp_pred)
2711        xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
2712    }
2713
2714    // If the segment reference frame feature is enabled....
2715    // then do nothing if the current ref frame is not allowed..
2716    if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
2717        vp9_get_segdata(xd, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
2718      continue;
2719    // If the segment skip feature is enabled....
2720    // then do nothing if the current mode is not allowed..
2721    } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) &&
2722               (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) {
2723      continue;
2724    // Disable this drop out case if the ref frame
2725    // segment level feature is enabled for this segment. This is to
2726    // prevent the possibility that we end up unable to pick any mode.
2727    } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) {
2728      // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
2729      // unless ARNR filtering is enabled in which case we want
2730      // an unfiltered alternative
2731      if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
2732        if (this_mode != ZEROMV || ref_frame != ALTREF_FRAME) {
2733          continue;
2734        }
2735      }
2736    }
2737    // TODO(JBB): This is to make up for the fact that we don't have sad
2738    // functions that work when the block size reads outside the umv.  We
2739    // should fix this either by making the motion search just work on
2740    // a representative block in the boundary ( first ) and then implement a
2741    // function that does sads when inside the border..
2742    if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) &&
2743        this_mode == NEWMV) {
2744      continue;
2745    }
2746
2747    if (this_mode == I4X4_PRED) {
2748      int rate;
2749
2750      mbmi->txfm_size = TX_4X4;
2751      rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y,
2752                                &distortion_y, INT64_MAX);
2753      rate2 += rate;
2754      rate2 += intra_cost_penalty;
2755      distortion2 += distortion_y;
2756
2757      rate2 += rate_uv_intra[TX_4X4];
2758      rate_uv = rate_uv_intra[TX_4X4];
2759      distortion2 += dist_uv[TX_4X4];
2760      distortion_uv = dist_uv[TX_4X4];
2761      mbmi->uv_mode = mode_uv[TX_4X4];
2762      txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
2763      for (i = 0; i < NB_TXFM_MODES; ++i)
2764        txfm_cache[i] = txfm_cache[ONLY_4X4];
2765    } else if (ref_frame == INTRA_FRAME) {
2766      TX_SIZE uv_tx;
2767      super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
2768                      bsize, txfm_cache);
2769
2770      uv_tx = mbmi->txfm_size;
2771      if (bsize < BLOCK_SIZE_MB16X16 && uv_tx == TX_8X8)
2772        uv_tx = TX_4X4;
2773      if (bsize < BLOCK_SIZE_SB32X32 && uv_tx == TX_16X16)
2774        uv_tx = TX_8X8;
2775      else if (bsize < BLOCK_SIZE_SB64X64 && uv_tx == TX_32X32)
2776        uv_tx = TX_16X16;
2777
2778      rate_uv = rate_uv_intra[uv_tx];
2779      distortion_uv = dist_uv[uv_tx];
2780      skippable = skippable && skip_uv[uv_tx];
2781      mbmi->uv_mode = mode_uv[uv_tx];
2782
2783      rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv;
2784      if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
2785        rate2 += intra_cost_penalty;
2786      distortion2 = distortion_y + distortion_uv;
2787    } else if (this_mode == SPLITMV) {
2788      const int is_comp_pred = mbmi->ref_frame[1] > 0;
2789      int rate, distortion;
2790      int64_t this_rd_thresh;
2791      int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
2792      int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
2793      int tmp_best_distortion = INT_MAX, tmp_best_skippable = 0;
2794      int switchable_filter_index;
2795      int_mv *second_ref = is_comp_pred ?
2796          &mbmi->ref_mvs[mbmi->ref_frame[1]][0] : NULL;
2797      union b_mode_info tmp_best_bmodes[16];
2798      MB_MODE_INFO tmp_best_mbmode;
2799      PARTITION_INFO tmp_best_partition;
2800      int pred_exists = 0;
2801      int uv_skippable;
2802
2803      this_rd_thresh = (mbmi->ref_frame[0] == LAST_FRAME) ?
2804          cpi->rd_threshes[bsize][THR_NEWMV] :
2805          cpi->rd_threshes[bsize][THR_NEWA];
2806      this_rd_thresh = (mbmi->ref_frame[0] == GOLDEN_FRAME) ?
2807          cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh;
2808      xd->mode_info_context->mbmi.txfm_size = TX_4X4;
2809
2810      for (switchable_filter_index = 0;
2811           switchable_filter_index < VP9_SWITCHABLE_FILTERS;
2812           ++switchable_filter_index) {
2813        int newbest;
2814        mbmi->interp_filter =
2815        vp9_switchable_interp[switchable_filter_index];
2816        vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
2817
2818        tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
2819                     &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
2820                     second_ref, INT64_MAX,
2821                     &rate, &rate_y, &distortion,
2822                     &skippable,
2823                     (int)this_rd_thresh, seg_mvs,
2824                     mi_row, mi_col);
2825        if (cpi->common.mcomp_filter_type == SWITCHABLE) {
2826          const int rs = get_switchable_rate(cm, x);
2827          tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0);
2828        }
2829        newbest = (tmp_rd < tmp_best_rd);
2830        if (newbest) {
2831          tmp_best_filter = mbmi->interp_filter;
2832          tmp_best_rd = tmp_rd;
2833        }
2834        if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
2835            (mbmi->interp_filter == cm->mcomp_filter_type &&
2836             cm->mcomp_filter_type != SWITCHABLE)) {
2837              tmp_best_rdu = tmp_rd;
2838              tmp_best_rate = rate;
2839              tmp_best_ratey = rate_y;
2840              tmp_best_distortion = distortion;
2841              tmp_best_skippable = skippable;
2842              tmp_best_mbmode = *mbmi;
2843              tmp_best_partition = *x->partition_info;
2844              for (i = 0; i < 4; i++)
2845                tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
2846              pred_exists = 1;
2847            }
2848      }  // switchable_filter_index loop
2849
2850      mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
2851                             tmp_best_filter : cm->mcomp_filter_type);
2852      vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
2853      if (!pred_exists) {
2854        // Handles the special case when a filter that is not in the
2855        // switchable list (bilinear, 6-tap) is indicated at the frame level
2856        tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
2857                     &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
2858                     second_ref, INT64_MAX,
2859                     &rate, &rate_y, &distortion,
2860                     &skippable,
2861                     (int)this_rd_thresh, seg_mvs,
2862                     mi_row, mi_col);
2863      } else {
2864        if (cpi->common.mcomp_filter_type == SWITCHABLE) {
2865          int rs = get_switchable_rate(cm, x);
2866          tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
2867        }
2868        tmp_rd = tmp_best_rdu;
2869        rate = tmp_best_rate;
2870        rate_y = tmp_best_ratey;
2871        distortion = tmp_best_distortion;
2872        skippable = tmp_best_skippable;
2873        *mbmi = tmp_best_mbmode;
2874        *x->partition_info = tmp_best_partition;
2875        for (i = 0; i < 4; i++)
2876          xd->mode_info_context->bmi[i] = tmp_best_bmodes[i];
2877      }
2878
2879      rate2 += rate;
2880      distortion2 += distortion;
2881
2882      if (cpi->common.mcomp_filter_type == SWITCHABLE)
2883        rate2 += get_switchable_rate(cm, x);
2884
2885      // If even the 'Y' rd value of split is higher than best so far
2886      // then dont bother looking at UV
2887      vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
2888                                      BLOCK_SIZE_SB8X8);
2889      vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
2890      super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
2891                                &uv_skippable, BLOCK_SIZE_SB8X8, TX_4X4);
2892      rate2 += rate_uv;
2893      distortion2 += distortion_uv;
2894      skippable = skippable && uv_skippable;
2895
2896      txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
2897      for (i = 0; i < NB_TXFM_MODES; ++i)
2898        txfm_cache[i] = txfm_cache[ONLY_4X4];
2899
2900      if (!mode_excluded) {
2901        if (is_comp_pred)
2902          mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
2903        else
2904          mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
2905      }
2906
2907      compmode_cost = vp9_cost_bit(comp_mode_p, is_comp_pred);
2908    } else {
2909      compmode_cost = vp9_cost_bit(comp_mode_p,
2910                                   mbmi->ref_frame[1] > INTRA_FRAME);
2911      this_rd = handle_inter_mode(cpi, x, bsize,
2912                                  txfm_cache,
2913                                  &rate2, &distortion2, &skippable,
2914                                  &rate_y, &distortion_y,
2915                                  &rate_uv, &distortion_uv,
2916                                  &mode_excluded, &disable_skip,
2917                                  &tmp_best_filter, frame_mv[this_mode],
2918                                  mi_row, mi_col,
2919                                  single_newmv);
2920      if (this_rd == INT64_MAX)
2921        continue;
2922    }
2923
2924    if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
2925      rate2 += compmode_cost;
2926    }
2927
2928    // Estimate the reference frame signaling cost and add it
2929    // to the rolling cost variable.
2930    if (mbmi->ref_frame[1] > INTRA_FRAME) {
2931      rate2 += ref_costs_comp[mbmi->ref_frame[0]];
2932    } else {
2933      rate2 += ref_costs_single[mbmi->ref_frame[0]];
2934    }
2935
2936    if (!disable_skip) {
2937      // Test for the condition where skip block will be activated
2938      // because there are no non zero coefficients and make any
2939      // necessary adjustment for rate. Ignore if skip is coded at
2940      // segment level as the cost wont have been added in.
2941      int mb_skip_allowed;
2942
2943      // Is Mb level skip allowed (i.e. not coded at segment level).
2944      mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
2945
2946      if (skippable && bsize >= BLOCK_SIZE_SB8X8) {
2947        // Back out the coefficient coding costs
2948        rate2 -= (rate_y + rate_uv);
2949        // for best_yrd calculation
2950        rate_uv = 0;
2951
2952        if (mb_skip_allowed) {
2953          int prob_skip_cost;
2954
2955          // Cost the skip mb case
2956          vp9_prob skip_prob =
2957            vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
2958
2959          if (skip_prob) {
2960            prob_skip_cost = vp9_cost_bit(skip_prob, 1);
2961            rate2 += prob_skip_cost;
2962          }
2963        }
2964      } else if (mb_skip_allowed) {
2965        // Add in the cost of the no skip flag.
2966        int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd,
2967                                                        PRED_MBSKIP), 0);
2968        rate2 += prob_skip_cost;
2969      }
2970
2971      // Calculate the final RD estimate for this mode.
2972      this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
2973    }
2974
2975#if 0
2976    // Keep record of best intra distortion
2977    if ((xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) &&
2978        (this_rd < best_intra_rd)) {
2979      best_intra_rd = this_rd;
2980      *returnintra = distortion2;
2981    }
2982#endif
2983
2984    if (!disable_skip && mbmi->ref_frame[0] == INTRA_FRAME)
2985      for (i = 0; i < NB_PREDICTION_TYPES; ++i)
2986        best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
2987
2988    if (this_rd < best_overall_rd) {
2989      best_overall_rd = this_rd;
2990      best_filter = tmp_best_filter;
2991      best_mode = this_mode;
2992    }
2993
2994    if (this_mode != I4X4_PRED && this_mode != SPLITMV) {
2995      // Store the respective mode distortions for later use.
2996      if (mode_distortions[this_mode] == -1
2997          || distortion2 < mode_distortions[this_mode]) {
2998        mode_distortions[this_mode] = distortion2;
2999      }
3000      if (frame_distortions[mbmi->ref_frame[0]] == -1
3001          || distortion2 < frame_distortions[mbmi->ref_frame[0]]) {
3002        frame_distortions[mbmi->ref_frame[0]] = distortion2;
3003      }
3004    }
3005
3006    // Did this mode help.. i.e. is it the new best mode
3007    if (this_rd < best_rd || x->skip) {
3008      if (!mode_excluded) {
3009        // Note index of best mode so far
3010        best_mode_index = mode_index;
3011
3012        if (ref_frame == INTRA_FRAME) {
3013          /* required for left and above block mv */
3014          mbmi->mv[0].as_int = 0;
3015        }
3016
3017        *returnrate = rate2;
3018        *returndistortion = distortion2;
3019        best_rd = this_rd;
3020        best_mbmode = *mbmi;
3021        best_partition = *x->partition_info;
3022
3023        if (this_mode == I4X4_PRED || this_mode == SPLITMV)
3024          for (i = 0; i < 4; i++)
3025            best_bmodes[i] = xd->mode_info_context->bmi[i];
3026      }
3027#if 0
3028      // Testing this mode gave rise to an improvement in best error score.
3029      // Lower threshold a bit for next time
3030      cpi->rd_thresh_mult[mode_index] =
3031          (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
3032              cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
3033      cpi->rd_threshes[mode_index] =
3034          (cpi->rd_baseline_thresh[mode_index] >> 7)
3035              * cpi->rd_thresh_mult[mode_index];
3036#endif
3037    } else {
3038      // If the mode did not help improve the best error case then
3039      // raise the threshold for testing that mode next time around.
3040#if 0
3041      cpi->rd_thresh_mult[mode_index] += 4;
3042
3043      if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
3044        cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
3045
3046      cpi->rd_threshes[mode_index] =
3047          (cpi->rd_baseline_thresh[mode_index] >> 7)
3048              * cpi->rd_thresh_mult[mode_index];
3049#endif
3050    }
3051
3052    /* keep record of best compound/single-only prediction */
3053    if (!disable_skip && mbmi->ref_frame[0] != INTRA_FRAME) {
3054      int single_rd, hybrid_rd, single_rate, hybrid_rate;
3055
3056      if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
3057        single_rate = rate2 - compmode_cost;
3058        hybrid_rate = rate2;
3059      } else {
3060        single_rate = rate2;
3061        hybrid_rate = rate2 + compmode_cost;
3062      }
3063
3064      single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3065      hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3066
3067      if (mbmi->ref_frame[1] <= INTRA_FRAME &&
3068          single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
3069        best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
3070      } else if (mbmi->ref_frame[1] > INTRA_FRAME &&
3071                 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
3072        best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
3073      }
3074      if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
3075        best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
3076    }
3077
3078    /* keep record of best txfm size */
3079    if (bsize < BLOCK_SIZE_SB32X32) {
3080      if (bsize < BLOCK_SIZE_MB16X16) {
3081        if (this_mode == SPLITMV || this_mode == I4X4_PRED)
3082          txfm_cache[ALLOW_8X8] = txfm_cache[ONLY_4X4];
3083        txfm_cache[ALLOW_16X16] = txfm_cache[ALLOW_8X8];
3084      }
3085      txfm_cache[ALLOW_32X32] = txfm_cache[ALLOW_16X16];
3086    }
3087    if (!mode_excluded && this_rd != INT64_MAX) {
3088      for (i = 0; i < NB_TXFM_MODES; i++) {
3089        int64_t adj_rd = INT64_MAX;
3090        if (this_mode != I4X4_PRED) {
3091          adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode];
3092        } else {
3093          adj_rd = this_rd;
3094        }
3095
3096        if (adj_rd < best_txfm_rd[i])
3097          best_txfm_rd[i] = adj_rd;
3098      }
3099    }
3100
3101    if (x->skip && !mode_excluded)
3102      break;
3103  }
3104  // Flag all modes that have a distortion thats > 2x the best we found at
3105  // this level.
3106  for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
3107    if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV)
3108      continue;
3109
3110    if (mode_distortions[mode_index] > 2 * *returndistortion) {
3111      ctx->modes_with_high_error |= (1 << mode_index);
3112    }
3113  }
3114
3115  // Flag all ref frames that have a distortion thats > 2x the best we found at
3116  // this level.
3117  for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3118    if (frame_distortions[ref_frame] > 2 * *returndistortion) {
3119      ctx->frames_with_high_error |= (1 << ref_frame);
3120    }
3121  }
3122
3123  if (best_rd == INT64_MAX && bsize < BLOCK_SIZE_SB8X8) {
3124    *returnrate = INT_MAX;
3125    *returndistortion = INT_MAX;
3126    return best_rd;
3127  }
3128
3129  assert((cm->mcomp_filter_type == SWITCHABLE) ||
3130         (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
3131         (best_mbmode.ref_frame[0] == INTRA_FRAME));
3132
3133  // Accumulate filter usage stats
3134  // TODO(agrange): Use RD criteria to select interpolation filter mode.
3135  if (is_inter_mode(best_mode))
3136    ++cpi->best_switchable_interp_count[vp9_switchable_interp_map[best_filter]];
3137
3138  // Updating rd_thresh_freq_fact[] here means that the differnt
3139  // partition/block sizes are handled independently based on the best
3140  // choice for the current partition. It may well be better to keep a scaled
3141  // best rd so far value and update rd_thresh_freq_fact based on the mode/size
3142  // combination that wins out.
3143  if (cpi->sf.adpative_rd_thresh) {
3144    for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3145      if (mode_index == best_mode_index) {
3146        cpi->rd_thresh_freq_fact[bsize][mode_index] = BASE_RD_THRESH_FREQ_FACT;
3147      } else {
3148        cpi->rd_thresh_freq_fact[bsize][mode_index] += MAX_RD_THRESH_FREQ_INC;
3149        if (cpi->rd_thresh_freq_fact[bsize][mode_index] >
3150            (cpi->sf.adpative_rd_thresh * MAX_RD_THRESH_FREQ_FACT)) {
3151          cpi->rd_thresh_freq_fact[bsize][mode_index] =
3152            cpi->sf.adpative_rd_thresh * MAX_RD_THRESH_FREQ_FACT;
3153        }
3154      }
3155    }
3156  }
3157
3158  // TODO(rbultje) integrate with RD trd_thresh_freq_facthresholding
3159#if 0
3160  // Reduce the activation RD thresholds for the best choice mode
3161  if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
3162      (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
3163    int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
3164
3165    cpi->rd_thresh_mult[best_mode_index] =
3166      (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ?
3167      cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
3168    cpi->rd_threshes[best_mode_index] =
3169      (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
3170  }
3171#endif
3172
3173  // This code forces Altref,0,0 and skip for the frame that overlays a
3174  // an alrtef unless Altref is filtered. However, this is unsafe if
3175  // segment level coding of ref frame is enabled for this segment.
3176  if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
3177      cpi->is_src_frame_alt_ref &&
3178      (cpi->oxcf.arnr_max_frames == 0) &&
3179      (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame[0] != ALTREF_FRAME)
3180      && bsize >= BLOCK_SIZE_SB8X8) {
3181    mbmi->mode = ZEROMV;
3182    mbmi->ref_frame[0] = ALTREF_FRAME;
3183    mbmi->ref_frame[1] = NONE;
3184    mbmi->mv[0].as_int = 0;
3185    mbmi->uv_mode = DC_PRED;
3186    mbmi->mb_skip_coeff = 1;
3187    if (cm->txfm_mode == TX_MODE_SELECT) {
3188      if (bsize >= BLOCK_SIZE_SB32X32)
3189        mbmi->txfm_size = TX_32X32;
3190      else if (bsize >= BLOCK_SIZE_MB16X16)
3191        mbmi->txfm_size = TX_16X16;
3192      else
3193        mbmi->txfm_size = TX_8X8;
3194    }
3195
3196    vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
3197    vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));
3198    goto end;
3199  }
3200
3201  // macroblock modes
3202  *mbmi = best_mbmode;
3203  if (best_mbmode.ref_frame[0] == INTRA_FRAME &&
3204      best_mbmode.sb_type < BLOCK_SIZE_SB8X8) {
3205    for (i = 0; i < 4; i++)
3206      xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
3207  }
3208
3209  if (best_mbmode.ref_frame[0] != INTRA_FRAME &&
3210      best_mbmode.sb_type < BLOCK_SIZE_SB8X8) {
3211    for (i = 0; i < 4; i++)
3212      xd->mode_info_context->bmi[i].as_mv[0].as_int =
3213          best_bmodes[i].as_mv[0].as_int;
3214
3215    if (mbmi->ref_frame[1] > 0)
3216      for (i = 0; i < 4; i++)
3217        xd->mode_info_context->bmi[i].as_mv[1].as_int =
3218            best_bmodes[i].as_mv[1].as_int;
3219
3220    *x->partition_info = best_partition;
3221
3222    mbmi->mv[0].as_int = x->partition_info->bmi[3].mv.as_int;
3223    mbmi->mv[1].as_int = x->partition_info->bmi[3].second_mv.as_int;
3224  }
3225
3226  for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
3227    if (best_pred_rd[i] == INT64_MAX)
3228      best_pred_diff[i] = INT_MIN;
3229    else
3230      best_pred_diff[i] = best_rd - best_pred_rd[i];
3231  }
3232
3233  if (!x->skip) {
3234    for (i = 0; i < NB_TXFM_MODES; i++) {
3235      if (best_txfm_rd[i] == INT64_MAX)
3236        best_txfm_diff[i] = 0;
3237      else
3238        best_txfm_diff[i] = best_rd - best_txfm_rd[i];
3239    }
3240  } else {
3241    vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
3242  }
3243
3244 end:
3245  set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
3246                    scale_factor);
3247  store_coding_context(x, ctx, best_mode_index,
3248                       &best_partition,
3249                       &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
3250                       &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
3251                                      mbmi->ref_frame[1]][0],
3252                       best_pred_diff, best_txfm_diff);
3253
3254  return best_rd;
3255}
3256