1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12#include <stdio.h>
13#include <math.h>
14#include <limits.h>
15#include <assert.h>
16#include "vp8/common/pragmas.h"
17
18#include "tokenize.h"
19#include "treewriter.h"
20#include "onyx_int.h"
21#include "modecosts.h"
22#include "encodeintra.h"
23#include "vp8/common/entropymode.h"
24#include "vp8/common/reconinter.h"
25#include "vp8/common/reconintra.h"
26#include "vp8/common/reconintra4x4.h"
27#include "vp8/common/findnearmv.h"
28#include "encodemb.h"
29#include "quantize.h"
30#include "vp8/common/idct.h"
31#include "vp8/common/g_common.h"
32#include "variance.h"
33#include "mcomp.h"
34
35#include "vpx_mem/vpx_mem.h"
36#include "dct.h"
37#include "vp8/common/systemdependent.h"
38
39#if CONFIG_RUNTIME_CPU_DETECT
40#define IF_RTCD(x)  (x)
41#else
42#define IF_RTCD(x)  NULL
43#endif
44
45
46extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
47extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
48
49
50#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
51
52#define MAXF(a,b)            (((a) > (b)) ? (a) : (b))
53
54
55
56static const int auto_speed_thresh[17] =
57{
58    1000,
59    200,
60    150,
61    130,
62    150,
63    125,
64    120,
65    115,
66    115,
67    115,
68    115,
69    115,
70    115,
71    115,
72    115,
73    115,
74    105
75};
76
77const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] =
78{
79    ZEROMV,
80    DC_PRED,
81
82    NEARESTMV,
83    NEARMV,
84
85    ZEROMV,
86    NEARESTMV,
87
88    ZEROMV,
89    NEARESTMV,
90
91    NEARMV,
92    NEARMV,
93
94    V_PRED,
95    H_PRED,
96    TM_PRED,
97
98    NEWMV,
99    NEWMV,
100    NEWMV,
101
102    SPLITMV,
103    SPLITMV,
104    SPLITMV,
105
106    B_PRED,
107};
108
109const MV_REFERENCE_FRAME vp8_ref_frame_order[MAX_MODES] =
110{
111    LAST_FRAME,
112    INTRA_FRAME,
113
114    LAST_FRAME,
115    LAST_FRAME,
116
117    GOLDEN_FRAME,
118    GOLDEN_FRAME,
119
120    ALTREF_FRAME,
121    ALTREF_FRAME,
122
123    GOLDEN_FRAME,
124    ALTREF_FRAME,
125
126    INTRA_FRAME,
127    INTRA_FRAME,
128    INTRA_FRAME,
129
130    LAST_FRAME,
131    GOLDEN_FRAME,
132    ALTREF_FRAME,
133
134    LAST_FRAME,
135    GOLDEN_FRAME,
136    ALTREF_FRAME,
137
138    INTRA_FRAME,
139};
140
141static void fill_token_costs(
142    unsigned int c      [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens],
143    const vp8_prob p    [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1]
144)
145{
146    int i, j, k;
147
148
149    for (i = 0; i < BLOCK_TYPES; i++)
150        for (j = 0; j < COEF_BANDS; j++)
151            for (k = 0; k < PREV_COEF_CONTEXTS; k++)
152
153                vp8_cost_tokens((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree);
154
155}
156
157static int rd_iifactor [ 32 ] =  {    4,   4,   3,   2,   1,   0,   0,   0,
158                                      0,   0,   0,   0,   0,   0,   0,   0,
159                                      0,   0,   0,   0,   0,   0,   0,   0,
160                                      0,   0,   0,   0,   0,   0,   0,   0,
161                                 };
162
163
164/* values are now correlated to quantizer */
165static int sad_per_bit16lut[QINDEX_RANGE] =
166{
167    5,  5,  5,  5,  5,  5,  6,  6,
168    6,  6,  6,  6,  6,  7,  7,  7,
169    7,  7,  7,  7,  8,  8,  8,  8,
170    8,  8,  8,  8,  8,  8,  9,  9,
171    9,  9,  9,  9, 10, 10, 10, 10,
172    10, 10, 11, 11, 11, 11, 11, 11,
173    12, 12, 12, 12, 12, 12, 12, 13,
174    13, 13, 13, 13, 13, 14, 14, 14,
175    14, 14, 15, 15, 15, 15, 15, 15,
176    16, 16, 16, 16, 16, 16, 17, 17,
177    17, 17, 17, 17, 17, 18, 18, 18,
178    18, 18, 19, 19, 19, 19, 19, 19,
179    20, 20, 20, 21, 21, 21, 21, 22,
180    22, 22, 23, 23, 23, 24, 24, 24,
181    25, 25, 26, 26, 27, 27, 27, 28,
182    28, 28, 29, 29, 30, 30, 31, 31
183};
184static int sad_per_bit4lut[QINDEX_RANGE] =
185{
186    5,  5,  5,  5,  5,  5,  7,  7,
187    7,  7,  7,  7,  7,  8,  8,  8,
188    8,  8,  8,  8,  10, 10, 10, 10,
189    10, 10, 10, 10, 10, 10, 11, 11,
190    11, 11, 11, 11, 13, 13, 13, 13,
191    13, 13, 14, 14, 14, 14, 14, 14,
192    16, 16, 16, 16, 16, 16, 16, 17,
193    17, 17, 17, 17, 17, 19, 19, 19,
194    19, 19, 20, 20, 20, 20, 20, 20,
195    22, 22, 22, 22, 22, 22, 23, 23,
196    23, 23, 23, 23, 23, 25, 25, 25,
197    25, 25, 26, 26, 26, 26, 26, 26,
198    28, 28, 28, 29, 29, 29, 29, 31,
199    31, 31, 32, 32, 32, 34, 34, 34,
200    35, 35, 37, 37, 38, 38, 38, 40,
201    40, 40, 41, 41, 43, 43, 44, 44,
202};
203
204void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex)
205{
206    cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
207    cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
208}
209
210void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
211{
212    int q;
213    int i;
214    double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
215    double rdconst = 3.00;
216
217    vp8_clear_system_state();  //__asm emms;
218
219    // Further tests required to see if optimum is different
220    // for key frames, golden frames and arf frames.
221    // if (cpi->common.refresh_golden_frame ||
222    //     cpi->common.refresh_alt_ref_frame)
223    cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
224
225    // Extend rate multiplier along side quantizer zbin increases
226    if (cpi->zbin_over_quant  > 0)
227    {
228        double oq_factor;
229        double modq;
230
231        // Experimental code using the same basic equation as used for Q above
232        // The units of cpi->zbin_over_quant are 1/128 of Q bin size
233        oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
234        modq = (int)((double)capped_q * oq_factor);
235        cpi->RDMULT = (int)(rdconst * (modq * modq));
236    }
237
238    if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME))
239    {
240        if (cpi->next_iiratio > 31)
241            cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
242        else
243            cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->next_iiratio]) >> 4;
244    }
245
246    cpi->mb.errorperbit = (cpi->RDMULT / 100);
247    cpi->mb.errorperbit += (cpi->mb.errorperbit==0);
248
249    vp8_set_speed_features(cpi);
250
251    if (cpi->common.simpler_lpf)
252        cpi->common.filter_type = SIMPLE_LOOPFILTER;
253
254    q = (int)pow(Qvalue, 1.25);
255
256    if (q < 8)
257        q = 8;
258
259    if (cpi->RDMULT > 1000)
260    {
261        cpi->RDDIV = 1;
262        cpi->RDMULT /= 100;
263
264        for (i = 0; i < MAX_MODES; i++)
265        {
266            if (cpi->sf.thresh_mult[i] < INT_MAX)
267            {
268                cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
269            }
270            else
271            {
272                cpi->rd_threshes[i] = INT_MAX;
273            }
274
275            cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
276        }
277    }
278    else
279    {
280        cpi->RDDIV = 100;
281
282        for (i = 0; i < MAX_MODES; i++)
283        {
284            if (cpi->sf.thresh_mult[i] < (INT_MAX / q))
285            {
286                cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
287            }
288            else
289            {
290                cpi->rd_threshes[i] = INT_MAX;
291            }
292
293            cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
294        }
295    }
296
297    fill_token_costs(
298        cpi->mb.token_costs,
299        (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs
300    );
301
302    vp8_init_mode_costs(cpi);
303
304}
305
306void vp8_auto_select_speed(VP8_COMP *cpi)
307{
308    int used = cpi->oxcf.cpu_used;
309
310    int milliseconds_for_compress = (int)(1000000 / cpi->oxcf.frame_rate);
311
312    milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
313
314#if 0
315
316    if (0)
317    {
318        FILE *f;
319
320        f = fopen("speed.stt", "a");
321        fprintf(f, " %8ld %10ld %10ld %10ld\n",
322                cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
323        fclose(f);
324    }
325
326#endif
327
328    /*
329    // this is done during parameter valid check
330    if( used > 16)
331        used = 16;
332    if( used < -16)
333        used = -16;
334    */
335
336    if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress)
337    {
338        if (cpi->avg_pick_mode_time == 0)
339        {
340            cpi->Speed = 4;
341        }
342        else
343        {
344            if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95)
345            {
346                cpi->Speed          += 2;
347                cpi->avg_pick_mode_time = 0;
348                cpi->avg_encode_time = 0;
349
350                if (cpi->Speed > 16)
351                {
352                    cpi->Speed = 16;
353                }
354            }
355
356            if (milliseconds_for_compress * 100 > cpi->avg_encode_time * auto_speed_thresh[cpi->Speed])
357            {
358                cpi->Speed          -= 1;
359                cpi->avg_pick_mode_time = 0;
360                cpi->avg_encode_time = 0;
361
362                // In real-time mode, cpi->speed is in [4, 16].
363                if (cpi->Speed < 4)        //if ( cpi->Speed < 0 )
364                {
365                    cpi->Speed = 4;        //cpi->Speed = 0;
366                }
367            }
368        }
369    }
370    else
371    {
372        cpi->Speed += 4;
373
374        if (cpi->Speed > 16)
375            cpi->Speed = 16;
376
377
378        cpi->avg_pick_mode_time = 0;
379        cpi->avg_encode_time = 0;
380    }
381}
382
383int vp8_block_error_c(short *coeff, short *dqcoeff)
384{
385    int i;
386    int error = 0;
387
388    for (i = 0; i < 16; i++)
389    {
390        int this_diff = coeff[i] - dqcoeff[i];
391        error += this_diff * this_diff;
392    }
393
394    return error;
395}
396
397int vp8_mbblock_error_c(MACROBLOCK *mb, int dc)
398{
399    BLOCK  *be;
400    BLOCKD *bd;
401    int i, j;
402    int berror, error = 0;
403
404    for (i = 0; i < 16; i++)
405    {
406        be = &mb->block[i];
407        bd = &mb->e_mbd.block[i];
408
409        berror = 0;
410
411        for (j = dc; j < 16; j++)
412        {
413            int this_diff = be->coeff[j] - bd->dqcoeff[j];
414            berror += this_diff * this_diff;
415        }
416
417        error += berror;
418    }
419
420    return error;
421}
422
423int vp8_mbuverror_c(MACROBLOCK *mb)
424{
425
426    BLOCK  *be;
427    BLOCKD *bd;
428
429
430    int i;
431    int error = 0;
432
433    for (i = 16; i < 24; i++)
434    {
435        be = &mb->block[i];
436        bd = &mb->e_mbd.block[i];
437
438        error += vp8_block_error_c(be->coeff, bd->dqcoeff);
439    }
440
441    return error;
442}
443
444int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd)
445{
446    unsigned char *uptr, *vptr;
447    unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
448    unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
449    int uv_stride = x->block[16].src_stride;
450
451    unsigned int sse1 = 0;
452    unsigned int sse2 = 0;
453    int mv_row;
454    int mv_col;
455    int offset;
456    int pre_stride = x->e_mbd.block[16].pre_stride;
457
458    vp8_build_uvmvs(&x->e_mbd, 0);
459    mv_row = x->e_mbd.block[16].bmi.mv.as_mv.row;
460    mv_col = x->e_mbd.block[16].bmi.mv.as_mv.col;
461
462    offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
463    uptr = x->e_mbd.pre.u_buffer + offset;
464    vptr = x->e_mbd.pre.v_buffer + offset;
465
466    if ((mv_row | mv_col) & 7)
467    {
468        VARIANCE_INVOKE(rtcd, subpixvar8x8)(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, uv_stride, &sse2);
469        VARIANCE_INVOKE(rtcd, subpixvar8x8)(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, uv_stride, &sse1);
470        sse2 += sse1;
471    }
472    else
473    {
474        VARIANCE_INVOKE(rtcd, subpixvar8x8)(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, uv_stride, &sse2);
475        VARIANCE_INVOKE(rtcd, subpixvar8x8)(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, uv_stride, &sse1);
476        sse2 += sse1;
477    }
478
479    return sse2;
480
481}
482
483#if !(CONFIG_REALTIME_ONLY)
484static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
485{
486    int c = !type;              /* start at coef 0, unless Y with Y2 */
487    int eob = b->eob;
488    int pt ;    /* surrounding block/prev coef predictor */
489    int cost = 0;
490    short *qcoeff_ptr = b->qcoeff;
491
492    VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
493
494# define QC( I)  ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
495
496    for (; c < eob; c++)
497    {
498        int v = QC(c);
499        int t = vp8_dct_value_tokens_ptr[v].Token;
500        cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t];
501        cost += vp8_dct_value_cost_ptr[v];
502        pt = vp8_prev_token_class[t];
503    }
504
505# undef QC
506
507    if (c < 16)
508        cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
509
510    pt = (c != !type); // is eob first coefficient;
511    *a = *l = pt;
512
513    return cost;
514}
515
516static int vp8_rdcost_mby(MACROBLOCK *mb)
517{
518    int cost = 0;
519    int b;
520    MACROBLOCKD *x = &mb->e_mbd;
521    ENTROPY_CONTEXT_PLANES t_above, t_left;
522    ENTROPY_CONTEXT *ta;
523    ENTROPY_CONTEXT *tl;
524
525    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
526    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
527
528    ta = (ENTROPY_CONTEXT *)&t_above;
529    tl = (ENTROPY_CONTEXT *)&t_left;
530
531    for (b = 0; b < 16; b++)
532        cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
533                    ta + vp8_block2above[b], tl + vp8_block2left[b]);
534
535    cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
536                ta + vp8_block2above[24], tl + vp8_block2left[24]);
537
538    return cost;
539}
540
541static void macro_block_yrd( MACROBLOCK *mb,
542                             int *Rate,
543                             int *Distortion,
544                             const vp8_encodemb_rtcd_vtable_t *rtcd)
545{
546    int b;
547    MACROBLOCKD *const x = &mb->e_mbd;
548    BLOCK   *const mb_y2 = mb->block + 24;
549    BLOCKD *const x_y2  = x->block + 24;
550    short *Y2DCPtr = mb_y2->src_diff;
551    BLOCK *beptr;
552    int d;
553
554    ENCODEMB_INVOKE(rtcd, submby)( mb->src_diff, mb->src.y_buffer,
555                                   mb->e_mbd.predictor, mb->src.y_stride );
556
557    // Fdct and building the 2nd order block
558    for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
559    {
560        mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
561        *Y2DCPtr++ = beptr->coeff[0];
562        *Y2DCPtr++ = beptr->coeff[16];
563    }
564
565    // 2nd order fdct
566    mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
567
568    // Quantization
569    for (b = 0; b < 16; b++)
570    {
571        mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
572    }
573
574    // DC predication and Quantization of 2nd Order block
575    mb->quantize_b(mb_y2, x_y2);
576
577    // Distortion
578    d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 1) << 2;
579    d += ENCODEMB_INVOKE(rtcd, berr)(mb_y2->coeff, x_y2->dqcoeff);
580
581    *Distortion = (d >> 4);
582
583    // rate
584    *Rate = vp8_rdcost_mby(mb);
585}
586
587static void copy_predictor(unsigned char *dst, const unsigned char *predictor)
588{
589    const unsigned int *p = (const unsigned int *)predictor;
590    unsigned int *d = (unsigned int *)dst;
591    d[0] = p[0];
592    d[4] = p[4];
593    d[8] = p[8];
594    d[12] = p[12];
595}
596static int rd_pick_intra4x4block(
597    VP8_COMP *cpi,
598    MACROBLOCK *x,
599    BLOCK *be,
600    BLOCKD *b,
601    B_PREDICTION_MODE *best_mode,
602    unsigned int *bmode_costs,
603    ENTROPY_CONTEXT *a,
604    ENTROPY_CONTEXT *l,
605
606    int *bestrate,
607    int *bestratey,
608    int *bestdistortion)
609{
610    B_PREDICTION_MODE mode;
611    int best_rd = INT_MAX;
612    int rate = 0;
613    int distortion;
614
615    ENTROPY_CONTEXT ta = *a, tempa = *a;
616    ENTROPY_CONTEXT tl = *l, templ = *l;
617    /*
618     * The predictor buffer is a 2d buffer with a stride of 16.  Create
619     * a temp buffer that meets the stride requirements, but we are only
620     * interested in the left 4x4 block
621     * */
622    DECLARE_ALIGNED_ARRAY(16, unsigned char,  best_predictor, 16*4);
623    DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16);
624
625    for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++)
626    {
627        int this_rd;
628        int ratey;
629
630        rate = bmode_costs[mode];
631
632        vp8_predict_intra4x4(b, mode, b->predictor);
633        ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16);
634        x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
635        x->quantize_b(be, b);
636
637        tempa = ta;
638        templ = tl;
639
640        ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
641        rate += ratey;
642        distortion = ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), berr)(be->coeff, b->dqcoeff) >> 2;
643
644        this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
645
646        if (this_rd < best_rd)
647        {
648            *bestrate = rate;
649            *bestratey = ratey;
650            *bestdistortion = distortion;
651            best_rd = this_rd;
652            *best_mode = mode;
653            *a = tempa;
654            *l = templ;
655            copy_predictor(best_predictor, b->predictor);
656            vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
657        }
658    }
659
660    b->bmi.mode = (B_PREDICTION_MODE)(*best_mode);
661
662    IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff, b->diff, 32);
663    RECON_INVOKE(IF_RTCD(&cpi->rtcd.common->recon), recon)(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
664
665    return best_rd;
666}
667
668int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
669                                  int *rate_y, int *Distortion, int best_rd)
670{
671    MACROBLOCKD *const xd = &mb->e_mbd;
672    int i;
673    int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
674    int distortion = 0;
675    int tot_rate_y = 0;
676    long long total_rd = 0;
677    ENTROPY_CONTEXT_PLANES t_above, t_left;
678    ENTROPY_CONTEXT *ta;
679    ENTROPY_CONTEXT *tl;
680    unsigned int *bmode_costs;
681
682    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
683    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
684
685    ta = (ENTROPY_CONTEXT *)&t_above;
686    tl = (ENTROPY_CONTEXT *)&t_left;
687
688    vp8_intra_prediction_down_copy(xd);
689
690    bmode_costs = mb->inter_bmode_costs;
691
692    for (i = 0; i < 16; i++)
693    {
694        MODE_INFO *const mic = xd->mode_info_context;
695        const int mis = xd->mode_info_stride;
696        B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
697        int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
698
699        if (mb->e_mbd.frame_type == KEY_FRAME)
700        {
701            const B_PREDICTION_MODE A = vp8_above_bmi(mic, i, mis)->mode;
702            const B_PREDICTION_MODE L = vp8_left_bmi(mic, i)->mode;
703
704            bmode_costs  = mb->bmode_costs[A][L];
705        }
706
707        total_rd += rd_pick_intra4x4block(
708            cpi, mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
709            ta + vp8_block2above[i],
710            tl + vp8_block2left[i], &r, &ry, &d);
711
712        cost += r;
713        distortion += d;
714        tot_rate_y += ry;
715        mic->bmi[i].mode = xd->block[i].bmi.mode = best_mode;
716
717        if(total_rd >= (long long)best_rd)
718            break;
719    }
720
721    if(total_rd >= (long long)best_rd)
722        return INT_MAX;
723
724    *Rate = cost;
725    *rate_y += tot_rate_y;
726    *Distortion = distortion;
727
728    return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
729}
730int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
731                                   MACROBLOCK *x,
732                                   int *Rate,
733                                   int *rate_y,
734                                   int *Distortion)
735{
736    MB_PREDICTION_MODE mode;
737    MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
738    int rate, ratey;
739    int distortion;
740    int best_rd = INT_MAX;
741    int this_rd;
742
743    //Y Search for 16x16 intra prediction mode
744    for (mode = DC_PRED; mode <= TM_PRED; mode++)
745    {
746        x->e_mbd.mode_info_context->mbmi.mode = mode;
747
748        RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
749            (&x->e_mbd);
750
751        macro_block_yrd(x, &ratey, &distortion, IF_RTCD(&cpi->rtcd.encodemb));
752        rate = ratey + x->mbmode_cost[x->e_mbd.frame_type]
753                                     [x->e_mbd.mode_info_context->mbmi.mode];
754
755        this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
756
757        if (this_rd < best_rd)
758        {
759            mode_selected = mode;
760            best_rd = this_rd;
761            *Rate = rate;
762            *rate_y = ratey;
763            *Distortion = distortion;
764        }
765    }
766
767    x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
768    return best_rd;
769}
770
771static int rd_cost_mbuv(MACROBLOCK *mb)
772{
773    int b;
774    int cost = 0;
775    MACROBLOCKD *x = &mb->e_mbd;
776    ENTROPY_CONTEXT_PLANES t_above, t_left;
777    ENTROPY_CONTEXT *ta;
778    ENTROPY_CONTEXT *tl;
779
780    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
781    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
782
783    ta = (ENTROPY_CONTEXT *)&t_above;
784    tl = (ENTROPY_CONTEXT *)&t_left;
785
786    for (b = 16; b < 24; b++)
787        cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
788                    ta + vp8_block2above[b], tl + vp8_block2left[b]);
789
790    return cost;
791}
792
793
794static int vp8_rd_inter_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel)
795{
796    vp8_build_uvmvs(&x->e_mbd, fullpixel);
797    vp8_encode_inter16x16uvrd(IF_RTCD(&cpi->rtcd), x);
798
799
800    *rate       = rd_cost_mbuv(x);
801    *distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4;
802
803    return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
804}
805
806int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion)
807{
808    MB_PREDICTION_MODE mode;
809    MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
810    int best_rd = INT_MAX;
811    int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
812    int rate_to;
813
814    for (mode = DC_PRED; mode <= TM_PRED; mode++)
815    {
816        int rate;
817        int distortion;
818        int this_rd;
819
820        x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
821        vp8_build_intra_predictors_mbuv(&x->e_mbd);
822        ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), submbuv)(x->src_diff,
823                      x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor,
824                      x->src.uv_stride);
825        vp8_transform_mbuv(x);
826        vp8_quantize_mbuv(x);
827
828        rate_to = rd_cost_mbuv(x);
829        rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.uv_mode];
830
831        distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4;
832
833        this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
834
835        if (this_rd < best_rd)
836        {
837            best_rd = this_rd;
838            d = distortion;
839            r = rate;
840            *rate_tokenonly = rate_to;
841            mode_selected = mode;
842        }
843    }
844
845    *rate = r;
846    *distortion = d;
847
848    x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
849    return best_rd;
850}
851#endif
852
853int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
854{
855    vp8_prob p [VP8_MVREFS-1];
856    assert(NEARESTMV <= m  &&  m <= SPLITMV);
857    vp8_mv_ref_probs(p, near_mv_ref_ct);
858    return vp8_cost_token(vp8_mv_ref_tree, p,
859                          vp8_mv_ref_encoding_array - NEARESTMV + m);
860}
861
862void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv)
863{
864    int i;
865
866    x->e_mbd.mode_info_context->mbmi.mode = mb;
867    x->e_mbd.mode_info_context->mbmi.mv.as_mv.row = mv->row;
868    x->e_mbd.mode_info_context->mbmi.mv.as_mv.col = mv->col;
869
870    for (i = 0; i < 16; i++)
871    {
872        B_MODE_INFO *bmi = &x->e_mbd.block[i].bmi;
873        bmi->mode = (B_PREDICTION_MODE) mb;
874        bmi->mv.as_mv.row = mv->row;
875        bmi->mv.as_mv.col = mv->col;
876    }
877}
878
879#if !(CONFIG_REALTIME_ONLY)
880static int labels2mode(
881    MACROBLOCK *x,
882    int const *labelings, int which_label,
883    B_PREDICTION_MODE this_mode,
884    MV *this_mv, MV *best_ref_mv,
885    int *mvcost[2]
886)
887{
888    MACROBLOCKD *const xd = & x->e_mbd;
889    MODE_INFO *const mic = xd->mode_info_context;
890    const int mis = xd->mode_info_stride;
891
892    int cost = 0;
893    int thismvcost = 0;
894
895    /* We have to be careful retrieving previously-encoded motion vectors.
896       Ones from this macroblock have to be pulled from the BLOCKD array
897       as they have not yet made it to the bmi array in our MB_MODE_INFO. */
898
899    int i = 0;
900
901    do
902    {
903        BLOCKD *const d = xd->block + i;
904        const int row = i >> 2,  col = i & 3;
905
906        B_PREDICTION_MODE m;
907
908        if (labelings[i] != which_label)
909            continue;
910
911        if (col  &&  labelings[i] == labelings[i-1])
912            m = LEFT4X4;
913        else if (row  &&  labelings[i] == labelings[i-4])
914            m = ABOVE4X4;
915        else
916        {
917            // the only time we should do costing for new motion vector or mode
918            // is when we are on a new label  (jbb May 08, 2007)
919            switch (m = this_mode)
920            {
921            case NEW4X4 :
922                thismvcost  = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
923                break;
924            case LEFT4X4:
925                *this_mv = col ? d[-1].bmi.mv.as_mv : vp8_left_bmi(mic, i)->mv.as_mv;
926                break;
927            case ABOVE4X4:
928                *this_mv = row ? d[-4].bmi.mv.as_mv : vp8_above_bmi(mic, i, mis)->mv.as_mv;
929                break;
930            case ZERO4X4:
931                this_mv->row = this_mv->col = 0;
932                break;
933            default:
934                break;
935            }
936
937            if (m == ABOVE4X4)  // replace above with left if same
938            {
939                const MV mv = col ? d[-1].bmi.mv.as_mv : vp8_left_bmi(mic, i)->mv.as_mv;
940
941                if (mv.row == this_mv->row  &&  mv.col == this_mv->col)
942                    m = LEFT4X4;
943            }
944
945            cost = x->inter_bmode_costs[ m];
946        }
947
948        d->bmi.mode = m;
949        d->bmi.mv.as_mv = *this_mv;
950
951    }
952    while (++i < 16);
953
954    cost += thismvcost ;
955    return cost;
956}
957
958static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
959                              int which_label, ENTROPY_CONTEXT *ta,
960                              ENTROPY_CONTEXT *tl)
961{
962    int cost = 0;
963    int b;
964    MACROBLOCKD *x = &mb->e_mbd;
965
966    for (b = 0; b < 16; b++)
967        if (labels[ b] == which_label)
968            cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
969                                ta + vp8_block2above[b],
970                                tl + vp8_block2left[b]);
971
972    return cost;
973
974}
975static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels, int which_label, const vp8_encodemb_rtcd_vtable_t *rtcd)
976{
977    int i;
978    unsigned int distortion = 0;
979
980    for (i = 0; i < 16; i++)
981    {
982        if (labels[i] == which_label)
983        {
984            BLOCKD *bd = &x->e_mbd.block[i];
985            BLOCK *be = &x->block[i];
986
987
988            vp8_build_inter_predictors_b(bd, 16, x->e_mbd.subpixel_predict);
989            ENCODEMB_INVOKE(rtcd, subb)(be, bd, 16);
990            x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
991
992            // set to 0 no way to account for 2nd order DC so discount
993            //be->coeff[0] = 0;
994            x->quantize_b(be, bd);
995
996            distortion += ENCODEMB_INVOKE(rtcd, berr)(be->coeff, bd->dqcoeff);
997        }
998    }
999
1000    return distortion;
1001}
1002
1003
1004static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0};
1005
1006
1007typedef struct
1008{
1009  MV *ref_mv;
1010  MV *mvp;
1011
1012  int segment_rd;
1013  int segment_num;
1014  int r;
1015  int d;
1016  int segment_yrate;
1017  B_PREDICTION_MODE modes[16];
1018  int_mv mvs[16];
1019  unsigned char eobs[16];
1020
1021  int mvthresh;
1022  int *mdcounts;
1023
1024  MV sv_mvp[4];     // save 4 mvp from 8x8
1025  int sv_istep[2];  // save 2 initial step_param for 16x8/8x16
1026
1027} BEST_SEG_INFO;
1028
1029
1030static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
1031                             BEST_SEG_INFO *bsi, unsigned int segmentation)
1032{
1033    int i;
1034    int const *labels;
1035    int br = 0;
1036    int bd = 0;
1037    B_PREDICTION_MODE this_mode;
1038
1039
1040    int label_count;
1041    int this_segment_rd = 0;
1042    int label_mv_thresh;
1043    int rate = 0;
1044    int sbr = 0;
1045    int sbd = 0;
1046    int segmentyrate = 0;
1047
1048    vp8_variance_fn_ptr_t *v_fn_ptr;
1049
1050    ENTROPY_CONTEXT_PLANES t_above, t_left;
1051    ENTROPY_CONTEXT *ta;
1052    ENTROPY_CONTEXT *tl;
1053    ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
1054    ENTROPY_CONTEXT *ta_b;
1055    ENTROPY_CONTEXT *tl_b;
1056
1057    vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
1058    vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
1059
1060    ta = (ENTROPY_CONTEXT *)&t_above;
1061    tl = (ENTROPY_CONTEXT *)&t_left;
1062    ta_b = (ENTROPY_CONTEXT *)&t_above_b;
1063    tl_b = (ENTROPY_CONTEXT *)&t_left_b;
1064
1065    br = 0;
1066    bd = 0;
1067
1068    v_fn_ptr = &cpi->fn_ptr[segmentation];
1069    labels = vp8_mbsplits[segmentation];
1070    label_count = vp8_mbsplit_count[segmentation];
1071
1072    // 64 makes this threshold really big effectively
1073    // making it so that we very rarely check mvs on
1074    // segments.   setting this to 1 would make mv thresh
1075    // roughly equal to what it is for macroblocks
1076    label_mv_thresh = 1 * bsi->mvthresh / label_count ;
1077
1078    // Segmentation method overheads
1079    rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation);
1080    rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
1081    this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
1082    br += rate;
1083
1084    for (i = 0; i < label_count; i++)
1085    {
1086        MV mode_mv[B_MODE_COUNT];
1087        int best_label_rd = INT_MAX;
1088        B_PREDICTION_MODE mode_selected = ZERO4X4;
1089        int bestlabelyrate = 0;
1090
1091        // search for the best motion vector on this segment
1092        for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++)
1093        {
1094            int this_rd;
1095            int distortion;
1096            int labelyrate;
1097            ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
1098            ENTROPY_CONTEXT *ta_s;
1099            ENTROPY_CONTEXT *tl_s;
1100
1101            vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
1102            vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
1103
1104            ta_s = (ENTROPY_CONTEXT *)&t_above_s;
1105            tl_s = (ENTROPY_CONTEXT *)&t_left_s;
1106
1107            if (this_mode == NEW4X4)
1108            {
1109                int sseshift;
1110                int num00;
1111                int step_param = 0;
1112                int further_steps;
1113                int n;
1114                int thissme;
1115                int bestsme = INT_MAX;
1116                MV  temp_mv;
1117                BLOCK *c;
1118                BLOCKD *e;
1119
1120                // Is the best so far sufficiently good that we cant justify doing and new motion search.
1121                if (best_label_rd < label_mv_thresh)
1122                    break;
1123
1124                if(cpi->compressor_speed)
1125                {
1126                    if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8)
1127                    {
1128                        bsi->mvp = &bsi->sv_mvp[i];
1129                        if (i==1 && segmentation == BLOCK_16X8) bsi->mvp = &bsi->sv_mvp[2];
1130
1131                        step_param = bsi->sv_istep[i];
1132                    }
1133
1134                    // use previous block's result as next block's MV predictor.
1135                    if (segmentation == BLOCK_4X4 && i>0)
1136                    {
1137                        bsi->mvp = &(x->e_mbd.block[i-1].bmi.mv.as_mv);
1138                        if (i==4 || i==8 || i==12) bsi->mvp = &(x->e_mbd.block[i-4].bmi.mv.as_mv);
1139                        step_param = 2;
1140                    }
1141                }
1142
1143                further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1144
1145                {
1146                    int sadpb = x->sadperbit4;
1147
1148                    // find first label
1149                    n = vp8_mbsplit_offset[segmentation][i];
1150
1151                    c = &x->block[n];
1152                    e = &x->e_mbd.block[n];
1153
1154                    if (cpi->sf.search_method == HEX)
1155                        bestsme = vp8_hex_search(x, c, e, bsi->ref_mv,
1156                                                 &mode_mv[NEW4X4], step_param, sadpb, &num00, v_fn_ptr, x->mvsadcost, x->mvcost, bsi->ref_mv);
1157
1158                    else
1159                    {
1160                        bestsme = cpi->diamond_search_sad(x, c, e, bsi->mvp,
1161                                                          &mode_mv[NEW4X4], step_param,
1162                                                          sadpb / 2, &num00, v_fn_ptr, x->mvsadcost, x->mvcost, bsi->ref_mv);
1163
1164                        n = num00;
1165                        num00 = 0;
1166
1167                        while (n < further_steps)
1168                        {
1169                            n++;
1170
1171                            if (num00)
1172                                num00--;
1173                            else
1174                            {
1175                                thissme = cpi->diamond_search_sad(x, c, e, bsi->mvp,
1176                                                                  &temp_mv, step_param + n,
1177                                                                  sadpb / 2, &num00, v_fn_ptr, x->mvsadcost, x->mvcost, bsi->ref_mv);
1178
1179                                if (thissme < bestsme)
1180                                {
1181                                    bestsme = thissme;
1182                                    mode_mv[NEW4X4].row = temp_mv.row;
1183                                    mode_mv[NEW4X4].col = temp_mv.col;
1184                                }
1185                            }
1186                        }
1187                    }
1188
1189                    sseshift = segmentation_to_sseshift[segmentation];
1190
1191                    // Should we do a full search (best quality only)
1192                    if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000)
1193                    {
1194                        thissme = cpi->full_search_sad(x, c, e, bsi->mvp,
1195                                                       sadpb / 4, 16, v_fn_ptr, x->mvcost, x->mvsadcost,bsi->ref_mv);
1196
1197                        if (thissme < bestsme)
1198                        {
1199                            bestsme = thissme;
1200                            mode_mv[NEW4X4] = e->bmi.mv.as_mv;
1201                        }
1202                        else
1203                        {
1204                            // The full search result is actually worse so re-instate the previous best vector
1205                            e->bmi.mv.as_mv = mode_mv[NEW4X4];
1206                        }
1207                    }
1208                }
1209
1210                if (bestsme < INT_MAX)
1211                {
1212                    if (!cpi->common.full_pixel)
1213                        cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
1214                                                     bsi->ref_mv, x->errorperbit / 2, v_fn_ptr, x->mvcost);
1215                    else
1216                        vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
1217                                                    bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost);
1218                }
1219            } /* NEW4X4 */
1220
1221            rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
1222                               bsi->ref_mv, x->mvcost);
1223
1224            // Trap vectors that reach beyond the UMV borders
1225            if (((mode_mv[this_mode].row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].row >> 3) > x->mv_row_max) ||
1226                ((mode_mv[this_mode].col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].col >> 3) > x->mv_col_max))
1227            {
1228                continue;
1229            }
1230
1231            distortion = vp8_encode_inter_mb_segment(x, labels, i, IF_RTCD(&cpi->rtcd.encodemb)) / 4;
1232
1233            labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
1234            rate += labelyrate;
1235
1236            this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1237
1238            if (this_rd < best_label_rd)
1239            {
1240                sbr = rate;
1241                sbd = distortion;
1242                bestlabelyrate = labelyrate;
1243                mode_selected = this_mode;
1244                best_label_rd = this_rd;
1245
1246                vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
1247                vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
1248
1249            }
1250        } /*for each 4x4 mode*/
1251
1252        vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
1253        vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
1254
1255        labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
1256                    bsi->ref_mv, x->mvcost);
1257
1258        br += sbr;
1259        bd += sbd;
1260        segmentyrate += bestlabelyrate;
1261        this_segment_rd += best_label_rd;
1262
1263        if (this_segment_rd >= bsi->segment_rd)
1264            break;
1265
1266    } /* for each label */
1267
1268    if (this_segment_rd < bsi->segment_rd)
1269    {
1270        bsi->r = br;
1271        bsi->d = bd;
1272        bsi->segment_yrate = segmentyrate;
1273        bsi->segment_rd = this_segment_rd;
1274        bsi->segment_num = segmentation;
1275
1276        // store everything needed to come back to this!!
1277        for (i = 0; i < 16; i++)
1278        {
1279            BLOCKD *bd = &x->e_mbd.block[i];
1280
1281            bsi->mvs[i].as_mv = bd->bmi.mv.as_mv;
1282            bsi->modes[i] = bd->bmi.mode;
1283            bsi->eobs[i] = bd->eob;
1284        }
1285    }
1286}
1287
1288static __inline
1289void vp8_cal_step_param(int sr, int *sp)
1290{
1291    int step = 0;
1292
1293    if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP;
1294    else if (sr < 1) sr = 1;
1295
1296    while (sr>>=1)
1297        step++;
1298
1299    *sp = MAX_MVSEARCH_STEPS - 1 - step;
1300}
1301
1302static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
1303                                           MV *best_ref_mv, int best_rd,
1304                                           int *mdcounts, int *returntotrate,
1305                                           int *returnyrate, int *returndistortion,
1306                                           int mvthresh)
1307{
1308    int i;
1309    BEST_SEG_INFO bsi;
1310
1311    vpx_memset(&bsi, 0, sizeof(bsi));
1312
1313    bsi.segment_rd = best_rd;
1314    bsi.ref_mv = best_ref_mv;
1315    bsi.mvp = best_ref_mv;
1316    bsi.mvthresh = mvthresh;
1317    bsi.mdcounts = mdcounts;
1318
1319    for(i = 0; i < 16; i++)
1320    {
1321        bsi.modes[i] = ZERO4X4;
1322    }
1323
1324    if(cpi->compressor_speed == 0)
1325    {
1326        /* for now, we will keep the original segmentation order
1327           when in best quality mode */
1328        rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1329        rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1330        rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1331        rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1332    }
1333    else
1334    {
1335        int sr;
1336
1337        rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
1338
1339        if (bsi.segment_rd < best_rd)
1340        {
1341            int col_min = (best_ref_mv->col - MAX_FULL_PEL_VAL) >>3;
1342            int col_max = (best_ref_mv->col + MAX_FULL_PEL_VAL) >>3;
1343            int row_min = (best_ref_mv->row - MAX_FULL_PEL_VAL) >>3;
1344            int row_max = (best_ref_mv->row + MAX_FULL_PEL_VAL) >>3;
1345
1346            int tmp_col_min = x->mv_col_min;
1347            int tmp_col_max = x->mv_col_max;
1348            int tmp_row_min = x->mv_row_min;
1349            int tmp_row_max = x->mv_row_max;
1350
1351            /* Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. */
1352            if (x->mv_col_min < col_min )
1353                x->mv_col_min = col_min;
1354            if (x->mv_col_max > col_max )
1355                x->mv_col_max = col_max;
1356            if (x->mv_row_min < row_min )
1357                x->mv_row_min = row_min;
1358            if (x->mv_row_max > row_max )
1359                x->mv_row_max = row_max;
1360
1361            /* Get 8x8 result */
1362            bsi.sv_mvp[0] = bsi.mvs[0].as_mv;
1363            bsi.sv_mvp[1] = bsi.mvs[2].as_mv;
1364            bsi.sv_mvp[2] = bsi.mvs[8].as_mv;
1365            bsi.sv_mvp[3] = bsi.mvs[10].as_mv;
1366
1367            /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range according to the closeness of 2 MV. */
1368            /* block 8X16 */
1369            {
1370                sr = MAXF((abs(bsi.sv_mvp[0].row - bsi.sv_mvp[2].row))>>3, (abs(bsi.sv_mvp[0].col - bsi.sv_mvp[2].col))>>3);
1371                vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1372
1373                sr = MAXF((abs(bsi.sv_mvp[1].row - bsi.sv_mvp[3].row))>>3, (abs(bsi.sv_mvp[1].col - bsi.sv_mvp[3].col))>>3);
1374                vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1375
1376                rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
1377            }
1378
1379            /* block 16X8 */
1380            {
1381                sr = MAXF((abs(bsi.sv_mvp[0].row - bsi.sv_mvp[1].row))>>3, (abs(bsi.sv_mvp[0].col - bsi.sv_mvp[1].col))>>3);
1382                vp8_cal_step_param(sr, &bsi.sv_istep[0]);
1383
1384                sr = MAXF((abs(bsi.sv_mvp[2].row - bsi.sv_mvp[3].row))>>3, (abs(bsi.sv_mvp[2].col - bsi.sv_mvp[3].col))>>3);
1385                vp8_cal_step_param(sr, &bsi.sv_istep[1]);
1386
1387                rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
1388            }
1389
1390            /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
1391            /* Not skip 4x4 if speed=0 (good quality) */
1392            if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)  /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
1393            {
1394                bsi.mvp = &bsi.sv_mvp[0];
1395                rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
1396            }
1397
1398            /* restore UMV window */
1399            x->mv_col_min = tmp_col_min;
1400            x->mv_col_max = tmp_col_max;
1401            x->mv_row_min = tmp_row_min;
1402            x->mv_row_max = tmp_row_max;
1403        }
1404    }
1405
1406    /* set it to the best */
1407    for (i = 0; i < 16; i++)
1408    {
1409        BLOCKD *bd = &x->e_mbd.block[i];
1410
1411        bd->bmi.mv.as_mv = bsi.mvs[i].as_mv;
1412        bd->bmi.mode = bsi.modes[i];
1413        bd->eob = bsi.eobs[i];
1414    }
1415
1416    *returntotrate = bsi.r;
1417    *returndistortion = bsi.d;
1418    *returnyrate = bsi.segment_yrate;
1419
1420    /* save partitions */
1421    x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
1422    x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
1423
1424    for (i = 0; i < x->partition_info->count; i++)
1425    {
1426        int j;
1427
1428        j = vp8_mbsplit_offset[bsi.segment_num][i];
1429
1430        x->partition_info->bmi[i].mode = x->e_mbd.block[j].bmi.mode;
1431        x->partition_info->bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;
1432    }
1433
1434    return bsi.segment_rd;
1435}
1436#endif
1437
1438static void swap(int *x,int *y)
1439{
1440   int tmp;
1441
1442   tmp = *x;
1443   *x = *y;
1444   *y = tmp;
1445}
1446
1447static void quicksortmv(int arr[],int left, int right)
1448{
1449   int lidx,ridx,pivot;
1450
1451   lidx = left;
1452   ridx = right;
1453
1454   if( left < right)
1455   {
1456      pivot = (left + right)/2;
1457
1458      while(lidx <=pivot && ridx >=pivot)
1459      {
1460          while(arr[lidx] < arr[pivot] && lidx <= pivot)
1461              lidx++;
1462          while(arr[ridx] > arr[pivot] && ridx >= pivot)
1463              ridx--;
1464          swap(&arr[lidx], &arr[ridx]);
1465          lidx++;
1466          ridx--;
1467          if(lidx-1 == pivot)
1468          {
1469              ridx++;
1470              pivot = ridx;
1471          }
1472          else if(ridx+1 == pivot)
1473          {
1474              lidx--;
1475              pivot = lidx;
1476          }
1477      }
1478      quicksortmv(arr, left, pivot - 1);
1479      quicksortmv(arr, pivot + 1, right);
1480   }
1481}
1482
1483static void quicksortsad(int arr[],int idx[], int left, int right)
1484{
1485   int lidx,ridx,pivot;
1486
1487   lidx = left;
1488   ridx = right;
1489
1490   if( left < right)
1491   {
1492      pivot = (left + right)/2;
1493
1494      while(lidx <=pivot && ridx >=pivot)
1495      {
1496          while(arr[lidx] < arr[pivot] && lidx <= pivot)
1497              lidx++;
1498          while(arr[ridx] > arr[pivot] && ridx >= pivot)
1499              ridx--;
1500          swap(&arr[lidx], &arr[ridx]);
1501          swap(&idx[lidx], &idx[ridx]);
1502          lidx++;
1503          ridx--;
1504          if(lidx-1 == pivot)
1505          {
1506              ridx++;
1507              pivot = ridx;
1508          }
1509          else if(ridx+1 == pivot)
1510          {
1511              lidx--;
1512              pivot = lidx;
1513          }
1514      }
1515      quicksortsad(arr, idx, left, pivot - 1);
1516      quicksortsad(arr, idx, pivot + 1, right);
1517   }
1518}
1519
1520//The improved MV prediction
1521void vp8_mv_pred
1522(
1523    VP8_COMP *cpi,
1524    MACROBLOCKD *xd,
1525    const MODE_INFO *here,
1526    MV *mvp,
1527    int refframe,
1528    int *ref_frame_sign_bias,
1529    int *sr,
1530    int near_sadidx[]
1531)
1532{
1533    const MODE_INFO *above = here - xd->mode_info_stride;
1534    const MODE_INFO *left = here - 1;
1535    const MODE_INFO *aboveleft = above - 1;
1536    int_mv           near_mvs[8];
1537    int              near_ref[8];
1538    int_mv           mv;
1539    int              vcnt=0;
1540    int              find=0;
1541    int              mb_offset;
1542
1543    int              mvx[8];
1544    int              mvy[8];
1545    int              i;
1546
1547    mv.as_int = 0;
1548
1549    if(here->mbmi.ref_frame != INTRA_FRAME)
1550    {
1551        near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0;
1552        near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0;
1553
1554        // read in 3 nearby block's MVs from current frame as prediction candidates.
1555        if (above->mbmi.ref_frame != INTRA_FRAME)
1556        {
1557            near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
1558            mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1559            near_ref[vcnt] =  above->mbmi.ref_frame;
1560        }
1561        vcnt++;
1562        if (left->mbmi.ref_frame != INTRA_FRAME)
1563        {
1564            near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
1565            mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1566            near_ref[vcnt] =  left->mbmi.ref_frame;
1567        }
1568        vcnt++;
1569        if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
1570        {
1571            near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
1572            mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1573            near_ref[vcnt] =  aboveleft->mbmi.ref_frame;
1574        }
1575        vcnt++;
1576
1577        // read in 5 nearby block's MVs from last frame.
1578        if(cpi->common.last_frame_type != KEY_FRAME)
1579        {
1580            mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ;
1581
1582            // current in last frame
1583            if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME)
1584            {
1585                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
1586                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1587                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset];
1588            }
1589            vcnt++;
1590
1591            // above in last frame
1592            if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME)
1593            {
1594                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int;
1595                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride-1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1596                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1];
1597            }
1598            vcnt++;
1599
1600            // left in last frame
1601            if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME)
1602            {
1603                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int;
1604                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1605                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - 1];
1606            }
1607            vcnt++;
1608
1609            // right in last frame
1610            if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME)
1611            {
1612                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int;
1613                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1614                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset +1];
1615            }
1616            vcnt++;
1617
1618            // below in last frame
1619            if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME)
1620            {
1621                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int;
1622                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
1623                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1];
1624            }
1625            vcnt++;
1626        }
1627
1628        for(i=0; i< vcnt; i++)
1629        {
1630            if(near_ref[near_sadidx[i]] != INTRA_FRAME)
1631            {
1632                if(here->mbmi.ref_frame == near_ref[near_sadidx[i]])
1633                {
1634                    mv.as_int = near_mvs[near_sadidx[i]].as_int;
1635                    find = 1;
1636                    if (i < 3)
1637                        *sr = 3;
1638                    else
1639                        *sr = 2;
1640                    break;
1641                }
1642            }
1643        }
1644
1645        if(!find)
1646        {
1647            for(i=0; i<vcnt; i++)
1648            {
1649                mvx[i] = near_mvs[i].as_mv.row;
1650                mvy[i] = near_mvs[i].as_mv.col;
1651            }
1652
1653            quicksortmv (mvx, 0, vcnt-1);
1654            quicksortmv (mvy, 0, vcnt-1);
1655            mv.as_mv.row = mvx[vcnt/2];
1656            mv.as_mv.col = mvy[vcnt/2];
1657
1658            find = 1;
1659            //sr is set to 0 to allow calling function to decide the search range.
1660            *sr = 0;
1661        }
1662    }
1663
1664    /* Set up return values */
1665    *mvp = mv.as_mv;
1666    vp8_clamp_mv(mvp, xd);
1667}
1668
1669void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[])
1670{
1671
1672    int near_sad[8] = {0}; // 0-cf above, 1-cf left, 2-cf aboveleft, 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
1673
1674    //calculate sad for current frame 3 nearby MBs.
1675    if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
1676    {
1677        near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
1678    }else if(xd->mb_to_top_edge==0)
1679    {   //only has left MB for sad calculation.
1680        near_sad[0] = near_sad[2] = INT_MAX;
1681        near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
1682    }else if(xd->mb_to_left_edge ==0)
1683    {   //only has left MB for sad calculation.
1684        near_sad[1] = near_sad[2] = INT_MAX;
1685        near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
1686    }else
1687    {
1688        near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
1689        near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
1690        near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, 0x7fffffff);
1691    }
1692
1693    if(cpi->common.last_frame_type != KEY_FRAME)
1694    {
1695        //calculate sad for last frame 5 nearby MBs.
1696        unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
1697        int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
1698
1699        if(xd->mb_to_top_edge==0) near_sad[4] = INT_MAX;
1700        if(xd->mb_to_left_edge ==0) near_sad[5] = INT_MAX;
1701        if(xd->mb_to_right_edge ==0) near_sad[6] = INT_MAX;
1702        if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX;
1703
1704        if(near_sad[4] != INT_MAX)
1705            near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff);
1706        if(near_sad[5] != INT_MAX)
1707            near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff);
1708        near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
1709        if(near_sad[6] != INT_MAX)
1710            near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer + 16, pre_y_stride, 0x7fffffff);
1711        if(near_sad[7] != INT_MAX)
1712            near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, 0x7fffffff);
1713    }
1714
1715    if(cpi->common.last_frame_type != KEY_FRAME)
1716    {
1717        quicksortsad(near_sad, near_sadidx, 0, 7);
1718    }else
1719    {
1720        quicksortsad(near_sad, near_sadidx, 0, 2);
1721    }
1722}
1723
1724#if !(CONFIG_REALTIME_ONLY)
1725int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra)
1726{
1727    BLOCK *b = &x->block[0];
1728    BLOCKD *d = &x->e_mbd.block[0];
1729    MACROBLOCKD *xd = &x->e_mbd;
1730    B_MODE_INFO best_bmodes[16];
1731    MB_MODE_INFO best_mbmode;
1732    PARTITION_INFO best_partition;
1733    MV best_ref_mv;
1734    MV mode_mv[MB_MODE_COUNT];
1735    MB_PREDICTION_MODE this_mode;
1736    int num00;
1737    int best_mode_index = 0;
1738
1739    int i;
1740    int mode_index;
1741    int mdcounts[4];
1742    int rate;
1743    int distortion;
1744    int best_rd = INT_MAX; // 1 << 30;
1745    int ref_frame_cost[MAX_REF_FRAMES];
1746    int rate2, distortion2;
1747    int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
1748    int rate_y, UNINITIALIZED_IS_SAFE(rate_uv);
1749    int distortion_uv;
1750    int best_yrd = INT_MAX;
1751
1752    //int all_rds[MAX_MODES];        // Experimental debug code.
1753    //int all_rates[MAX_MODES];
1754    //int all_dist[MAX_MODES];
1755    //int intermodecost[MAX_MODES];
1756
1757    MB_PREDICTION_MODE uv_intra_mode;
1758
1759    int force_no_skip = 0;
1760
1761    MV mvp;
1762    int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
1763    int saddone=0;
1764    int sr=0;    //search range got from mv_pred(). It uses step_param levels. (0-7)
1765
1766    MV frame_nearest_mv[4];
1767    MV frame_near_mv[4];
1768    MV frame_best_ref_mv[4];
1769    int frame_mdcounts[4][4];
1770    int frame_lf_or_gf[4];
1771    unsigned char *y_buffer[4];
1772    unsigned char *u_buffer[4];
1773    unsigned char *v_buffer[4];
1774
1775    vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
1776
1777    if (cpi->ref_frame_flags & VP8_LAST_FLAG)
1778    {
1779        YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
1780
1781        vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[LAST_FRAME], &frame_near_mv[LAST_FRAME],
1782                          &frame_best_ref_mv[LAST_FRAME], frame_mdcounts[LAST_FRAME], LAST_FRAME, cpi->common.ref_frame_sign_bias);
1783
1784        y_buffer[LAST_FRAME] = lst_yv12->y_buffer + recon_yoffset;
1785        u_buffer[LAST_FRAME] = lst_yv12->u_buffer + recon_uvoffset;
1786        v_buffer[LAST_FRAME] = lst_yv12->v_buffer + recon_uvoffset;
1787
1788        frame_lf_or_gf[LAST_FRAME] = 0;
1789    }
1790
1791    if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
1792    {
1793        YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx];
1794
1795        vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[GOLDEN_FRAME], &frame_near_mv[GOLDEN_FRAME],
1796                          &frame_best_ref_mv[GOLDEN_FRAME], frame_mdcounts[GOLDEN_FRAME], GOLDEN_FRAME, cpi->common.ref_frame_sign_bias);
1797
1798        y_buffer[GOLDEN_FRAME] = gld_yv12->y_buffer + recon_yoffset;
1799        u_buffer[GOLDEN_FRAME] = gld_yv12->u_buffer + recon_uvoffset;
1800        v_buffer[GOLDEN_FRAME] = gld_yv12->v_buffer + recon_uvoffset;
1801
1802        frame_lf_or_gf[GOLDEN_FRAME] = 1;
1803    }
1804
1805    if (cpi->ref_frame_flags & VP8_ALT_FLAG)
1806    {
1807        YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx];
1808
1809        vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[ALTREF_FRAME], &frame_near_mv[ALTREF_FRAME],
1810                          &frame_best_ref_mv[ALTREF_FRAME], frame_mdcounts[ALTREF_FRAME], ALTREF_FRAME, cpi->common.ref_frame_sign_bias);
1811
1812        y_buffer[ALTREF_FRAME] = alt_yv12->y_buffer + recon_yoffset;
1813        u_buffer[ALTREF_FRAME] = alt_yv12->u_buffer + recon_uvoffset;
1814        v_buffer[ALTREF_FRAME] = alt_yv12->v_buffer + recon_uvoffset;
1815
1816        frame_lf_or_gf[ALTREF_FRAME] = 1;
1817    }
1818
1819    *returnintra = INT_MAX;
1820    cpi->mbs_tested_so_far++;          // Count of the number of MBs tested so far this frame
1821
1822    x->skip = 0;
1823
1824    ref_frame_cost[INTRA_FRAME]   = vp8_cost_zero(cpi->prob_intra_coded);
1825
1826    // Special case treatment when GF and ARF are not sensible options for reference
1827    if (cpi->ref_frame_flags == VP8_LAST_FLAG)
1828    {
1829        ref_frame_cost[LAST_FRAME]    = vp8_cost_one(cpi->prob_intra_coded)
1830                                        + vp8_cost_zero(255);
1831        ref_frame_cost[GOLDEN_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
1832                                        + vp8_cost_one(255)
1833                                        + vp8_cost_zero(128);
1834        ref_frame_cost[ALTREF_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
1835                                        + vp8_cost_one(255)
1836                                        + vp8_cost_one(128);
1837    }
1838    else
1839    {
1840        ref_frame_cost[LAST_FRAME]    = vp8_cost_one(cpi->prob_intra_coded)
1841                                        + vp8_cost_zero(cpi->prob_last_coded);
1842        ref_frame_cost[GOLDEN_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
1843                                        + vp8_cost_one(cpi->prob_last_coded)
1844                                        + vp8_cost_zero(cpi->prob_gf_coded);
1845        ref_frame_cost[ALTREF_FRAME]  = vp8_cost_one(cpi->prob_intra_coded)
1846                                        + vp8_cost_one(cpi->prob_last_coded)
1847                                        + vp8_cost_one(cpi->prob_gf_coded);
1848    }
1849
1850    vpx_memset(mode_mv, 0, sizeof(mode_mv));
1851
1852    x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
1853    vp8_rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion);
1854    uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
1855
1856    for (mode_index = 0; mode_index < MAX_MODES; mode_index++)
1857    {
1858        int this_rd = INT_MAX;
1859        int lf_or_gf = 0;           // Lat Frame (01) or gf/arf (1)
1860        int disable_skip = 0;
1861        int other_cost = 0;
1862
1863        force_no_skip = 0;
1864
1865        // Experimental debug code.
1866        // Record of rd values recorded for this MB. -1 indicates not measured
1867        //all_rds[mode_index] = -1;
1868        //all_rates[mode_index] = -1;
1869        //all_dist[mode_index] = -1;
1870        //intermodecost[mode_index] = -1;
1871
1872        // Test best rd so far against threshold for trying this mode.
1873        if (best_rd <= cpi->rd_threshes[mode_index])
1874            continue;
1875
1876        // These variables hold are rolling total cost and distortion for this mode
1877        rate2 = 0;
1878        distortion2 = 0;
1879
1880        this_mode = vp8_mode_order[mode_index];
1881
1882        x->e_mbd.mode_info_context->mbmi.mode = this_mode;
1883        x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
1884        x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index];
1885
1886        // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
1887        // unless ARNR filtering is enabled in which case we want
1888        // an unfiltered alternative
1889        if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
1890        {
1891            if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
1892                continue;
1893        }
1894
1895        /* everything but intra */
1896        if (x->e_mbd.mode_info_context->mbmi.ref_frame)
1897        {
1898            x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
1899            x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
1900            x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
1901            mode_mv[NEARESTMV] = frame_nearest_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
1902            mode_mv[NEARMV] = frame_near_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
1903            best_ref_mv = frame_best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
1904            vpx_memcpy(mdcounts, frame_mdcounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts));
1905            lf_or_gf = frame_lf_or_gf[x->e_mbd.mode_info_context->mbmi.ref_frame];
1906        }
1907
1908        if(x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
1909        {
1910            if(!saddone)
1911            {
1912                vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] );
1913                saddone = 1;
1914            }
1915
1916            vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
1917                        x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
1918
1919            /* adjust mvp to make sure it is within MV range */
1920            if(mvp.row > best_ref_mv.row + MAX_FULL_PEL_VAL)
1921                mvp.row = best_ref_mv.row + MAX_FULL_PEL_VAL;
1922            else if(mvp.row < best_ref_mv.row - MAX_FULL_PEL_VAL)
1923                mvp.row = best_ref_mv.row - MAX_FULL_PEL_VAL;
1924            if(mvp.col > best_ref_mv.col + MAX_FULL_PEL_VAL)
1925                mvp.col = best_ref_mv.col + MAX_FULL_PEL_VAL;
1926            else if(mvp.col < best_ref_mv.col - MAX_FULL_PEL_VAL)
1927                mvp.col = best_ref_mv.col - MAX_FULL_PEL_VAL;
1928        }
1929
1930        // Check to see if the testing frequency for this mode is at its max
1931        // If so then prevent it from being tested and increase the threshold for its testing
1932        if (cpi->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))
1933        {
1934            if (cpi->mbs_tested_so_far  <= cpi->mode_check_freq[mode_index] * cpi->mode_test_hit_counts[mode_index])
1935            {
1936                // Increase the threshold for coding this mode to make it less likely to be chosen
1937                cpi->rd_thresh_mult[mode_index] += 4;
1938
1939                if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
1940                    cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
1941
1942                cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
1943
1944                continue;
1945            }
1946        }
1947
1948        // We have now reached the point where we are going to test the current mode so increment the counter for the number of times it has been tested
1949        cpi->mode_test_hit_counts[mode_index] ++;
1950
1951        // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
1952        if (cpi->zbin_mode_boost_enabled)
1953        {
1954            if ( vp8_ref_frame_order[mode_index] == INTRA_FRAME )
1955                cpi->zbin_mode_boost = 0;
1956            else
1957            {
1958                if (vp8_mode_order[mode_index] == ZEROMV)
1959                {
1960                    if (vp8_ref_frame_order[mode_index] != LAST_FRAME)
1961                        cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1962                    else
1963                        cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1964                }
1965                else if (vp8_mode_order[mode_index] == SPLITMV)
1966                    cpi->zbin_mode_boost = 0;
1967                else
1968                    cpi->zbin_mode_boost = MV_ZBIN_BOOST;
1969            }
1970
1971            vp8_update_zbin_extra(cpi, x);
1972        }
1973
1974        switch (this_mode)
1975        {
1976        case B_PRED:
1977        {
1978            int tmp_rd;
1979
1980            // Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED];
1981            tmp_rd = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd);
1982            rate2 += rate;
1983            distortion2 += distortion;
1984
1985            if(tmp_rd < best_yrd)
1986            {
1987                rate2 += uv_intra_rate;
1988                rate_uv = uv_intra_rate_tokenonly;
1989                distortion2 += uv_intra_distortion;
1990                distortion_uv = uv_intra_distortion;
1991            }
1992            else
1993            {
1994                this_rd = INT_MAX;
1995                disable_skip = 1;
1996            }
1997        }
1998        break;
1999
2000        case SPLITMV:
2001        {
2002            int tmp_rd;
2003            int this_rd_thresh;
2004
2005            this_rd_thresh = (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) ? cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA];
2006            this_rd_thresh = (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) ? cpi->rd_threshes[THR_NEWG]: this_rd_thresh;
2007
2008            tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
2009                                                     best_yrd, mdcounts,
2010                                                     &rate, &rate_y, &distortion, this_rd_thresh) ;
2011
2012            rate2 += rate;
2013            distortion2 += distortion;
2014
2015            // If even the 'Y' rd value of split is higher than best so far then dont bother looking at UV
2016            if (tmp_rd < best_yrd)
2017            {
2018                // Now work out UV cost and add it in
2019                vp8_rd_inter_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel);
2020                rate2 += rate_uv;
2021                distortion2 += distortion_uv;
2022            }
2023            else
2024            {
2025                this_rd = INT_MAX;
2026                disable_skip = 1;
2027            }
2028        }
2029        break;
2030        case DC_PRED:
2031        case V_PRED:
2032        case H_PRED:
2033        case TM_PRED:
2034            x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
2035            RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
2036                (&x->e_mbd);
2037            macro_block_yrd(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd.encodemb)) ;
2038            rate2 += rate_y;
2039            distortion2 += distortion;
2040            rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
2041            rate2 += uv_intra_rate;
2042            rate_uv = uv_intra_rate_tokenonly;
2043            distortion2 += uv_intra_distortion;
2044            distortion_uv = uv_intra_distortion;
2045            break;
2046
2047        case NEWMV:
2048
2049            // Decrement full search counter
2050            if (cpi->check_freq[lf_or_gf] > 0)
2051                cpi->check_freq[lf_or_gf] --;
2052
2053            {
2054                int thissme;
2055                int bestsme = INT_MAX;
2056                int step_param = cpi->sf.first_step;
2057                int search_range;
2058                int further_steps;
2059                int n;
2060
2061                int col_min = (best_ref_mv.col - MAX_FULL_PEL_VAL) >>3;
2062                int col_max = (best_ref_mv.col + MAX_FULL_PEL_VAL) >>3;
2063                int row_min = (best_ref_mv.row - MAX_FULL_PEL_VAL) >>3;
2064                int row_max = (best_ref_mv.row + MAX_FULL_PEL_VAL) >>3;
2065
2066                int tmp_col_min = x->mv_col_min;
2067                int tmp_col_max = x->mv_col_max;
2068                int tmp_row_min = x->mv_row_min;
2069                int tmp_row_max = x->mv_row_max;
2070
2071                // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search.
2072                if (x->mv_col_min < col_min )
2073                    x->mv_col_min = col_min;
2074                if (x->mv_col_max > col_max )
2075                    x->mv_col_max = col_max;
2076                if (x->mv_row_min < row_min )
2077                    x->mv_row_min = row_min;
2078                if (x->mv_row_max > row_max )
2079                    x->mv_row_max = row_max;
2080
2081                //adjust search range according to sr from mv prediction
2082                if(sr > step_param)
2083                    step_param = sr;
2084
2085                // Work out how long a search we should do
2086                search_range = MAXF(abs(best_ref_mv.col), abs(best_ref_mv.row)) >> 3;
2087
2088                if (search_range >= x->vector_range)
2089                    x->vector_range = search_range;
2090                else if (x->vector_range > cpi->sf.min_fs_radius)
2091                    x->vector_range--;
2092
2093                // Initial step/diamond search
2094                {
2095                    int sadpb = x->sadperbit16;
2096
2097                    if (cpi->sf.search_method == HEX)
2098                    {
2099                        bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv);
2100                        mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
2101                        mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
2102                    }
2103                    else
2104                    {
2105                        bestsme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb < 9
2106                        mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
2107                        mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
2108
2109                        // Further step/diamond searches as necessary
2110                        n = 0;
2111                        further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2112
2113                        n = num00;
2114                        num00 = 0;
2115
2116                        while (n < further_steps)
2117                        {
2118                            n++;
2119
2120                            if (num00)
2121                                num00--;
2122                            else
2123                            {
2124                                thissme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb = 9
2125
2126                                if (thissme < bestsme)
2127                                {
2128                                    bestsme = thissme;
2129                                    mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
2130                                    mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
2131                                }
2132                                else
2133                                {
2134                                    d->bmi.mv.as_mv.row = mode_mv[NEWMV].row;
2135                                    d->bmi.mv.as_mv.col = mode_mv[NEWMV].col;
2136                                }
2137                            }
2138                        }
2139                    }
2140
2141                }
2142
2143                // Should we do a full search
2144                if (!cpi->check_freq[lf_or_gf] || cpi->do_full[lf_or_gf])
2145                {
2146                    int thissme;
2147                    int full_flag_thresh = 0;
2148                    MV full_mvp;
2149
2150                    full_mvp.row = d->bmi.mv.as_mv.row <<3;    // use diamond search result as full search staring point
2151                    full_mvp.col = d->bmi.mv.as_mv.col <<3;
2152
2153                    // Update x->vector_range based on best vector found in step search
2154                    search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col));
2155                    //search_range *= 1.4;  //didn't improve PSNR
2156
2157                    if (search_range > x->vector_range)
2158                        x->vector_range = search_range;
2159                    else
2160                        search_range = x->vector_range;
2161
2162                    // Apply limits
2163                    search_range = (search_range > cpi->sf.max_fs_radius) ? cpi->sf.max_fs_radius : search_range;
2164
2165                    //add this to reduce full search range.
2166                    if(sr<=3 && search_range > 8) search_range = 8;
2167
2168                    {
2169                        int sadpb = x->sadperbit16 >> 2;
2170                        thissme = cpi->full_search_sad(x, b, d, &full_mvp, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, x->mvsadcost,&best_ref_mv);
2171                    }
2172
2173                    // Barrier threshold to initiating full search
2174                    // full_flag_thresh = 10 + (thissme >> 7);
2175                    if ((thissme + full_flag_thresh) < bestsme)
2176                    {
2177                        cpi->do_full[lf_or_gf] ++;
2178                        bestsme = thissme;
2179                    }
2180                    else if (thissme < bestsme)
2181                        bestsme = thissme;
2182                    else
2183                    {
2184                        cpi->do_full[lf_or_gf] = cpi->do_full[lf_or_gf] >> 1;
2185                        cpi->check_freq[lf_or_gf] = cpi->sf.full_freq[lf_or_gf];
2186
2187                        // The full search result is actually worse so re-instate the previous best vector
2188                        d->bmi.mv.as_mv.row = mode_mv[NEWMV].row;
2189                        d->bmi.mv.as_mv.col = mode_mv[NEWMV].col;
2190                    }
2191                }
2192
2193                x->mv_col_min = tmp_col_min;
2194                x->mv_col_max = tmp_col_max;
2195                x->mv_row_min = tmp_row_min;
2196                x->mv_row_max = tmp_row_max;
2197
2198                if (bestsme < INT_MAX)
2199                    // cpi->find_fractional_mv_step(x,b,d,&d->bmi.mv.as_mv,&best_ref_mv,x->errorperbit/2,cpi->fn_ptr.svf,cpi->fn_ptr.vf,x->mvcost);  // normal mvc=11
2200                    cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost);
2201
2202                mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
2203                mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
2204
2205                // Add the new motion vector cost to our rolling cost variable
2206                rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
2207
2208            }
2209
2210        case NEARESTMV:
2211        case NEARMV:
2212
2213            // Clip "next_nearest" so that it does not extend to far out of image
2214            if (mode_mv[this_mode].col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
2215                mode_mv[this_mode].col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
2216            else if (mode_mv[this_mode].col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
2217                mode_mv[this_mode].col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
2218
2219            if (mode_mv[this_mode].row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
2220                mode_mv[this_mode].row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
2221            else if (mode_mv[this_mode].row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
2222                mode_mv[this_mode].row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
2223
2224            // Do not bother proceeding if the vector (from newmv,nearest or near) is 0,0 as this should then be coded using the zeromv mode.
2225            if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) &&
2226                ((mode_mv[this_mode].row == 0) && (mode_mv[this_mode].col == 0)))
2227                continue;
2228
2229        case ZEROMV:
2230
2231        mv_selected:
2232
2233            // Trap vectors that reach beyond the UMV borders
2234            // Note that ALL New MV, Nearest MV Near MV and Zero MV code drops through to this point
2235            // because of the lack of break statements in the previous two cases.
2236            if (((mode_mv[this_mode].row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].row >> 3) > x->mv_row_max) ||
2237                ((mode_mv[this_mode].col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].col >> 3) > x->mv_col_max))
2238                continue;
2239
2240            vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
2241            vp8_build_inter_predictors_mby(&x->e_mbd);
2242
2243            if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
2244                x->skip = 1;
2245            }
2246            else if (x->encode_breakout)
2247            {
2248                int sum, sse;
2249                int threshold = (xd->block[0].dequant[1]
2250                            * xd->block[0].dequant[1] >>4);
2251
2252                if(threshold < x->encode_breakout)
2253                    threshold = x->encode_breakout;
2254
2255                VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)
2256                    (x->src.y_buffer, x->src.y_stride,
2257                     x->e_mbd.predictor, 16, (unsigned int *)(&sse), &sum);
2258
2259                if (sse < threshold)
2260                {
2261                    // Check u and v to make sure skip is ok
2262                    int sse2 = 0;
2263                    /* If theres is no codeable 2nd order dc
2264                       or a very small uniform pixel change change */
2265                    if (abs(sum) < (xd->block[24].dequant[0]<<2)||
2266                        ((sum * sum>>8) > sse && abs(sum) <128))
2267                    {
2268                        sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance));
2269
2270                        if (sse2 * 2 < threshold)
2271                        {
2272                            x->skip = 1;
2273                            distortion2 = sse + sse2;
2274                            rate2 = 500;
2275
2276                            /* for best_yrd calculation */
2277                            rate_uv = 0;
2278                            distortion_uv = sse2;
2279
2280                            disable_skip = 1;
2281                            this_rd = RDCOST(x->rdmult, x->rddiv, rate2,
2282                                             distortion2);
2283
2284                            break;
2285                        }
2286                    }
2287                }
2288            }
2289
2290
2291            //intermodecost[mode_index] = vp8_cost_mv_ref(this_mode, mdcounts);   // Experimental debug code
2292
2293            // Add in the Mv/mode cost
2294            rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
2295
2296            // Y cost and distortion
2297            macro_block_yrd(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd.encodemb));
2298            rate2 += rate_y;
2299            distortion2 += distortion;
2300
2301            // UV cost and distortion
2302            vp8_rd_inter_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel);
2303            rate2 += rate_uv;
2304            distortion2 += distortion_uv;
2305            break;
2306
2307        default:
2308            break;
2309        }
2310
2311        // Where skip is allowable add in the default per mb cost for the no skip case.
2312        // where we then decide to skip we have to delete this and replace it with the
2313        // cost of signallying a skip
2314        if (cpi->common.mb_no_coeff_skip)
2315        {
2316            other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
2317            rate2 += other_cost;
2318        }
2319
2320        // Estimate the reference frame signaling cost and add it to the rolling cost variable.
2321        rate2 += ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
2322
2323        if (!disable_skip)
2324        {
2325            // Test for the condition where skip block will be activated because there are no non zero coefficients and make any necessary adjustment for rate
2326            if (cpi->common.mb_no_coeff_skip)
2327            {
2328                int tteob;
2329
2330                tteob = 0;
2331
2332                for (i = 0; i <= 24; i++)
2333                {
2334                    tteob += x->e_mbd.block[i].eob;
2335                }
2336
2337                if (tteob == 0)
2338                {
2339                    rate2 -= (rate_y + rate_uv);
2340                    //for best_yrd calculation
2341                    rate_uv = 0;
2342
2343                    // Back out no skip flag costing and add in skip flag costing
2344                    if (cpi->prob_skip_false)
2345                    {
2346                        int prob_skip_cost;
2347
2348                        prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
2349                        prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0);
2350                        rate2 += prob_skip_cost;
2351                        other_cost += prob_skip_cost;
2352                    }
2353                }
2354            }
2355            // Calculate the final RD estimate for this mode
2356            this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
2357        }
2358
2359        // Experimental debug code.
2360        //all_rds[mode_index] = this_rd;
2361        //all_rates[mode_index] = rate2;
2362        //all_dist[mode_index] = distortion2;
2363
2364        if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)  && (this_rd < *returnintra))
2365        {
2366            *returnintra = this_rd ;
2367        }
2368
2369        // Did this mode help.. i.i is it the new best mode
2370        if (this_rd < best_rd || x->skip)
2371        {
2372            // Note index of best mode so far
2373            best_mode_index = mode_index;
2374            x->e_mbd.mode_info_context->mbmi.force_no_skip = force_no_skip;
2375
2376            if (this_mode <= B_PRED)
2377            {
2378                x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
2379            }
2380
2381            other_cost += ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
2382
2383            /* Calculate the final y RD estimate for this mode */
2384            best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2-rate_uv-other_cost),
2385                              (distortion2-distortion_uv));
2386
2387            *returnrate = rate2;
2388            *returndistortion = distortion2;
2389            best_rd = this_rd;
2390            vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
2391            vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
2392
2393            for (i = 0; i < 16; i++)
2394            {
2395                vpx_memcpy(&best_bmodes[i], &x->e_mbd.block[i].bmi, sizeof(B_MODE_INFO));
2396            }
2397
2398            // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time
2399            cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
2400            cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
2401        }
2402
2403        // If the mode did not help improve the best error case then raise the threshold for testing that mode next time around.
2404        else
2405        {
2406            cpi->rd_thresh_mult[mode_index] += 4;
2407
2408            if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
2409                cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
2410
2411            cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
2412        }
2413
2414        if (x->skip)
2415            break;
2416
2417    }
2418
2419    // Reduce the activation RD thresholds for the best choice mode
2420    if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2)))
2421    {
2422        int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
2423
2424        cpi->rd_thresh_mult[best_mode_index] = (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
2425        cpi->rd_threshes[best_mode_index] = (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
2426
2427        // If we chose a split mode then reset the new MV thresholds as well
2428        /*if ( vp8_mode_order[best_mode_index] == SPLITMV )
2429        {
2430            best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWMV] >> 4);
2431            cpi->rd_thresh_mult[THR_NEWMV] = (cpi->rd_thresh_mult[THR_NEWMV] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWMV]-best_adjustment: MIN_THRESHMULT;
2432            cpi->rd_threshes[THR_NEWMV] = (cpi->rd_baseline_thresh[THR_NEWMV] >> 7) * cpi->rd_thresh_mult[THR_NEWMV];
2433
2434            best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWG] >> 4);
2435            cpi->rd_thresh_mult[THR_NEWG] = (cpi->rd_thresh_mult[THR_NEWG] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWG]-best_adjustment: MIN_THRESHMULT;
2436            cpi->rd_threshes[THR_NEWG] = (cpi->rd_baseline_thresh[THR_NEWG] >> 7) * cpi->rd_thresh_mult[THR_NEWG];
2437
2438            best_adjustment = 4; //(cpi->rd_thresh_mult[THR_NEWA] >> 4);
2439            cpi->rd_thresh_mult[THR_NEWA] = (cpi->rd_thresh_mult[THR_NEWA] >= (MIN_THRESHMULT+best_adjustment)) ? cpi->rd_thresh_mult[THR_NEWA]-best_adjustment: MIN_THRESHMULT;
2440            cpi->rd_threshes[THR_NEWA] = (cpi->rd_baseline_thresh[THR_NEWA] >> 7) * cpi->rd_thresh_mult[THR_NEWA];
2441        }*/
2442
2443    }
2444
2445    // If we have chosen new mv or split then decay the full search check count more quickly.
2446    if ((vp8_mode_order[best_mode_index] == NEWMV) || (vp8_mode_order[best_mode_index] == SPLITMV))
2447    {
2448        int lf_or_gf = (vp8_ref_frame_order[best_mode_index] == LAST_FRAME) ? 0 : 1;
2449
2450        if (cpi->check_freq[lf_or_gf] && !cpi->do_full[lf_or_gf])
2451        {
2452            cpi->check_freq[lf_or_gf] --;
2453        }
2454    }
2455
2456    // Keep a record of best mode index that we chose
2457    cpi->last_best_mode_index = best_mode_index;
2458
2459    // Note how often each mode chosen as best
2460    cpi->mode_chosen_counts[best_mode_index] ++;
2461
2462
2463    if (cpi->is_src_frame_alt_ref && (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME))
2464    {
2465        best_mbmode.mode = ZEROMV;
2466        best_mbmode.ref_frame = ALTREF_FRAME;
2467        best_mbmode.mv.as_int = 0;
2468        best_mbmode.uv_mode = 0;
2469        best_mbmode.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
2470        best_mbmode.partitioning = 0;
2471        best_mbmode.dc_diff = 0;
2472
2473        vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
2474        vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
2475
2476        for (i = 0; i < 16; i++)
2477        {
2478            vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO));
2479        }
2480
2481        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
2482
2483        return best_rd;
2484    }
2485
2486
2487    if(best_mbmode.mode <= B_PRED)
2488    {
2489        int i;
2490        for (i = 0; i < 16; i++)
2491        {
2492            best_bmodes[i].mv.as_int = 0;
2493        }
2494    }
2495
2496    // macroblock modes
2497    vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
2498    vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
2499
2500    for (i = 0; i < 16; i++)
2501    {
2502        vpx_memcpy(&x->e_mbd.block[i].bmi, &best_bmodes[i], sizeof(B_MODE_INFO));
2503    }
2504
2505    x->e_mbd.mode_info_context->mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
2506
2507    return best_rd;
2508}
2509#endif
2510