1/******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*/
20
21/**
22 *******************************************************************************
23 * @file
24 *  ih264e_me.c
25 *
26 * @brief
27 *  Contains definition of functions for motion estimation
28 *
29 * @author
30 *  ittiam
31 *
32 * @par List of Functions:
33 *  - ih264e_init_mv_bits()
34 *  - ih264e_skip_analysis_chroma()
35 *  - ih264e_skip_analysis_luma()
36 *  - ih264e_analyse_skip()
37 *  - ih264e_get_search_candidates()
38 *  - ih264e_find_skip_motion_vector()
39 *  - ih264e_get_mv_predictor()
40 *  - ih264e_mv_pred()
41 *  - ih264e_mv_pred_me()
42 *  - ih264e_init_me()
43 *  - ih264e_compute_me()
44 *  - ih264e_compute_me_nmb()
45 *
46 * @remarks
47 *  None
48 *
49 *******************************************************************************
50 */
51
52/*****************************************************************************/
53/* File Includes                                                             */
54/*****************************************************************************/
55
56/* System include files */
57#include <stdio.h>
58#include <assert.h>
59#include <limits.h>
60
61/* User include files */
62#include "ih264_typedefs.h"
63#include "iv2.h"
64#include "ive2.h"
65#include "ithread.h"
66#include "ih264_platform_macros.h"
67#include "ih264_defs.h"
68#include "ime_defs.h"
69#include "ime_distortion_metrics.h"
70#include "ime_structs.h"
71#include "ih264_structs.h"
72#include "ih264_trans_quant_itrans_iquant.h"
73#include "ih264_inter_pred_filters.h"
74#include "ih264_mem_fns.h"
75#include "ih264_padding.h"
76#include "ih264_intra_pred_filters.h"
77#include "ih264_deblk_edge_filters.h"
78#include "ih264_cabac_tables.h"
79#include "ih264e_defs.h"
80#include "ih264e_error.h"
81#include "ih264e_bitstream.h"
82#include "irc_cntrl_param.h"
83#include "irc_frame_info_collector.h"
84#include "ih264e_rate_control.h"
85#include "ih264e_cabac_structs.h"
86#include "ih264e_structs.h"
87#include "ih264e_globals.h"
88#include "ih264_macros.h"
89#include "ih264e_me.h"
90#include "ime.h"
91#include "ih264_debug.h"
92#include "ih264e_intra_modes_eval.h"
93#include "ih264e_core_coding.h"
94#include "ih264e_mc.h"
95#include "ih264e_debug.h"
96#include "ih264e_half_pel.h"
97#include "ime_statistics.h"
98#include "ih264e_platform_macros.h"
99
100
101/*****************************************************************************/
102/* Function Definitions                                                      */
103/*****************************************************************************/
104
105/**
106*******************************************************************************
107*
108* @brief
109*  This function populates the length of the codewords for motion vectors in the
110*  range (-search range, search range) in pixels
111*
112* @param[in] ps_me
113*  Pointer to me ctxt
114*
115* @param[out] pu1_mv_bits
116*  length of the codeword for all mv's
117*
118* @remarks The length of the code words are derived from signed exponential
119* goloumb codes.
120*
121*******************************************************************************
122*/
123void ih264e_init_mv_bits(me_ctxt_t *ps_me_ctxt)
124{
125    /* temp var */
126    WORD32 i, codesize = 3, diff, limit;
127    UWORD32 u4_code_num, u4_range;
128    UWORD32 u4_uev_min, u4_uev_max, u4_sev_min, u4_sev_max;
129
130    /* max srch range */
131    diff = MAX(DEFAULT_MAX_SRCH_RANGE_X, DEFAULT_MAX_SRCH_RANGE_Y);
132    /* sub pel */
133    diff <<= 2;
134    /* delta mv */
135    diff <<= 1;
136
137    /* codeNum for positive integer     =  2x-1     : Table9-3  */
138    u4_code_num = (diff << 1);
139
140    /* get range of the bit string and put using put_bits()                 */
141    GETRANGE(u4_range, u4_code_num);
142
143    limit = 2*u4_range - 1;
144
145    /* init mv bits */
146    ps_me_ctxt->pu1_mv_bits[0] = 1;
147
148    while (codesize < limit)
149    {
150        u4_uev_min = (1 << (codesize >> 1));
151        u4_uev_max = 2*u4_uev_min - 1;
152
153        u4_sev_min = u4_uev_min >> 1;
154        u4_sev_max = u4_uev_max >> 1;
155
156        DEBUG("\n%d min, %d max %d codesize", u4_sev_min, u4_sev_max, codesize);
157
158        for (i = u4_sev_min; i <= (WORD32)u4_sev_max; i++)
159        {
160            ps_me_ctxt->pu1_mv_bits[-i] = ps_me_ctxt->pu1_mv_bits[i] = codesize;
161        }
162
163        codesize += 2;
164    }
165}
166
167
168
169/**
170*******************************************************************************
171*
172* @brief Determines the valid candidates for which the initial search shall happen.
173* The best of these candidates is used to center the diamond pixel search.
174*
175* @par Description: The function sends the skip, (0,0), left, top and top-right
176* neighbouring MBs MVs. The left, top and top-right MBs MVs are used because
177* these are the same MVs that are used to form the MV predictor. This initial MV
178* search candidates need not take care of slice boundaries and hence neighbor
179* availability checks are not made here.
180*
181* @param[in] ps_left_mb_pu
182*  pointer to left mb motion vector info
183*
184* @param[in] ps_top_mb_pu
185*  pointer to top & top right mb motion vector info
186*
187* @param[in] ps_top_left_mb_pu
188*  pointer to top left mb motion vector info
189*
190* @param[out] ps_skip_mv
191*  pointer to skip motion vectors for the curr mb
192*
193* @param[in] i4_mb_x
194*  mb index x
195*
196* @param[in] i4_mb_y
197*  mb index y
198*
199* @param[in] i4_wd_mbs
200*  pic width in mbs
201*
202* @param[in] ps_motionEst
203*  pointer to me context
204*
205* @returns  The list of MVs to be used of priming the full pel search and the
206* number of such MVs
207*
208* @remarks
209*   Assumptions : 1. Assumes Only partition of size 16x16
210*
211*******************************************************************************
212*/
213static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
214                                         me_ctxt_t *ps_me_ctxt,
215                                         WORD32 i4_reflist)
216{
217    /* curr mb indices */
218    WORD32 i4_mb_x = ps_proc->i4_mb_x;
219
220    /* Motion vector */
221    mv_t *ps_left_mv, *ps_top_mv, *ps_top_left_mv, *ps_top_right_mv;
222
223    /* Pred modes */
224    WORD32 i4_left_mode, i4_top_mode, i4_top_left_mode, i4_top_right_mode;
225
226    /* mb part info */
227    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
228
229    /* mvs */
230    WORD32 mvx, mvy;
231
232    /* ngbr availability */
233    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
234
235    /* Current mode */
236    WORD32 i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
237
238    /* srch range*/
239    WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
240    WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
241    WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
242    WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
243
244    /* num of candidate search candidates */
245    UWORD32 u4_num_candidates = 0;
246
247    ps_left_mv = &ps_proc->s_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
248    ps_top_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x)->s_me_info[i4_reflist].s_mv;
249    ps_top_left_mv = &ps_proc->s_top_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
250    ps_top_right_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->s_me_info[i4_reflist].s_mv;
251
252    i4_left_mode = ps_proc->s_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
253    i4_top_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x)->b2_pred_mode != i4_cmpl_predmode;
254    i4_top_left_mode = ps_proc->s_top_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
255    i4_top_right_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->b2_pred_mode != i4_cmpl_predmode;
256
257    /* Taking the Zero motion vector as one of the candidates   */
258    ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = 0;
259    ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = 0;
260
261    u4_num_candidates++;
262
263    /* Taking the Left MV Predictor as one of the candidates    */
264    if (ps_ngbr_avbl->u1_mb_a && i4_left_mode)
265    {
266        mvx      = (ps_left_mv->i2_mvx + 2) >> 2;
267        mvy      = (ps_left_mv->i2_mvy + 2) >> 2;
268
269        mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
270        mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
271
272        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
273        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
274
275        u4_num_candidates ++;
276    }
277
278    /* Taking the Top MV Predictor as one of the candidates     */
279    if (ps_ngbr_avbl->u1_mb_b && i4_top_mode)
280    {
281        mvx      = (ps_top_mv->i2_mvx + 2) >> 2;
282        mvy      = (ps_top_mv->i2_mvy + 2) >> 2;
283
284        mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
285        mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
286
287        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
288        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
289
290        u4_num_candidates ++;
291
292        /* Taking the TopRt MV Predictor as one of the candidates   */
293        if (ps_ngbr_avbl->u1_mb_c && i4_top_right_mode)
294        {
295            mvx      = (ps_top_right_mv->i2_mvx + 2) >> 2;
296            mvy      = (ps_top_right_mv->i2_mvy + 2)>> 2;
297
298            mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
299            mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
300
301            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
302            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
303
304            u4_num_candidates ++;
305        }
306        /* Taking the TopLt MV Predictor as one of the candidates   */
307        else if(ps_ngbr_avbl->u1_mb_d && i4_top_left_mode)
308        {
309            mvx      = (ps_top_left_mv->i2_mvx + 2) >> 2;
310            mvy      = (ps_top_left_mv->i2_mvy + 2) >> 2;
311
312            mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
313            mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
314
315            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
316            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
317
318            u4_num_candidates ++;
319        }
320    }
321
322
323    /********************************************************************/
324    /*                            MV Prediction                         */
325    /********************************************************************/
326    ih264e_mv_pred_me(ps_proc, i4_reflist);
327
328    ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvx;
329    ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvy;
330
331    /* Get the skip motion vector                               */
332    {
333        ps_me_ctxt->i4_skip_type = ps_proc->ps_codec->apf_find_skip_params_me
334                                    [ps_proc->i4_slice_type](ps_proc, i4_reflist);
335
336        /* Taking the Skip motion vector as one of the candidates   */
337        mvx = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvx + 2) >> 2;
338        mvy = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvy + 2) >> 2;
339
340        mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
341        mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
342
343        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
344        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
345        u4_num_candidates++;
346
347        if (ps_proc->i4_slice_type == BSLICE)
348        {
349            /* Taking the temporal Skip motion vector as one of the candidates   */
350            mvx = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvx + 2) >> 2;
351            mvy = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvy + 2) >> 2;
352
353            mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
354            mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
355
356            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
357            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
358            u4_num_candidates++;
359        }
360    }
361
362    ASSERT(u4_num_candidates <= 6);
363
364    ps_me_ctxt->u4_num_candidates[i4_reflist] = u4_num_candidates;
365}
366
367/**
368*******************************************************************************
369*
370* @brief The function computes parameters for a PSKIP MB
371*
372* @par Description:
373*  The function updates the skip motion vector and checks if the current
374*  MB can be a skip PSKIP mB or not
375*
376* @param[in] ps_proc
377*  Pointer to process context
378*
379* @param[in] u4_for_me
380*  Flag to indicate function is called for ME or not
381*
382* @param[out] i4_ref_list
383*  Current active refernce list
384*
385* @returns Flag indicating if the current MB can be marked as skip
386*
387* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
388*   specification.
389*
390*******************************************************************************
391*/
392WORD32 ih264e_find_pskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
393{
394    /* left mb motion vector */
395    enc_pu_t *ps_left_mb_pu ;
396
397    /* top mb motion vector */
398    enc_pu_t *ps_top_mb_pu ;
399
400    /* Skip mv */
401    mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
402
403    UNUSED(i4_reflist);
404
405    ps_left_mb_pu = &ps_proc->s_left_mb_pu ;
406    ps_top_mb_pu = ps_proc->ps_top_row_pu + ps_proc->i4_mb_x;
407
408    if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
409        (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
410        (
411          (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
412          (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
413          (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
414       ) ||
415       (
416          (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
417          (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
418          (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
419       )
420     )
421
422    {
423        ps_skip_mv->i2_mvx = 0;
424        ps_skip_mv->i2_mvy = 0;
425    }
426    else
427    {
428        ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
429        ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
430    }
431
432    if ( (ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx == ps_skip_mv->i2_mvx)
433     && (ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy == ps_skip_mv->i2_mvy))
434    {
435        return 1;
436    }
437
438    return 0;
439}
440
441/**
442*******************************************************************************
443*
444* @brief The function computes parameters for a PSKIP MB
445*
446* @par Description:
447*  The function updates the skip motion vector and checks if the current
448*  MB can be a skip PSKIP mB or not
449*
450* @param[in] ps_proc
451*  Pointer to process context
452*
453* @param[in] u4_for_me
454*  Flag to dincate fucntion is called for ME or not
455*
456* @param[out] i4_ref_list
457*  Current active refernce list
458*
459* @returns Flag indicating if the current MB can be marked as skip
460*
461* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
462*   specification.
463*
464*******************************************************************************
465*/
466WORD32 ih264e_find_pskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
467{
468    /* left mb motion vector */
469    enc_pu_t *ps_left_mb_pu ;
470
471    /* top mb motion vector */
472    enc_pu_t *ps_top_mb_pu ;
473
474    /* Skip mv */
475    mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
476
477    UNUSED(i4_reflist);
478
479    ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
480    ps_top_mb_pu = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
481
482    if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
483        (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
484        (
485          (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
486          (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
487          (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
488        ) ||
489        (
490          (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
491          (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
492          (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
493        )
494     )
495
496    {
497        ps_skip_mv->i2_mvx = 0;
498        ps_skip_mv->i2_mvy = 0;
499    }
500    else
501    {
502        ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
503        ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
504    }
505
506    return PRED_L0;
507}
508
509
510/**
511*******************************************************************************
512*
513* @brief motion vector predictor
514*
515* @par Description:
516*  The routine calculates the motion vector predictor for a given block,
517*  given the candidate MV predictors.
518*
519* @param[in] ps_left_mb_pu
520*  pointer to left mb motion vector info
521*
522* @param[in] ps_top_row_pu
523*  pointer to top & top right mb motion vector info
524*
525* @param[out] ps_pred_mv
526*  pointer to candidate predictors for the current block
527*
528* @returns  The x & y components of the MV predictor.
529*
530* @remarks The code implements the logic as described in sec 8.4.1.3 in H264
531*   specification.
532*   Assumptions : 1. Assumes Single reference frame
533*                 2. Assumes Only partition of size 16x16
534*
535*******************************************************************************
536*/
537void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu,
538                             enc_pu_t *ps_top_row_pu,
539                             enc_pu_mv_t *ps_pred_mv,
540                             WORD32 i4_ref_list)
541{
542
543    /* Indicated the current ref */
544    WORD8 i1_ref_idx;
545
546    /* For pred L0 */
547    i1_ref_idx = -1;
548    {
549        /* temp var */
550        WORD32 pred_algo = 3, a, b, c;
551
552        /* If only one of the candidate blocks has a reference frame equal to
553         * the current block then use the same block as the final predictor */
554        a = (ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
555        b = (ps_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
556        c = (ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
557
558        if (a == 0 && b == -1 && c == -1)
559            pred_algo = 0; /* LEFT */
560        else if(a == -1 && b == 0 && c == -1)
561            pred_algo = 1; /* TOP */
562        else if(a == -1 && b == -1 && c == 0)
563            pred_algo = 2; /* TOP RIGHT */
564
565        switch (pred_algo)
566        {
567            case 0:
568                /* left */
569                ps_pred_mv->s_mv.i2_mvx = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx;
570                ps_pred_mv->s_mv.i2_mvy = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy;
571                break;
572            case 1:
573                /* top */
574                ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx;
575                ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy;
576                break;
577            case 2:
578                /* top right */
579                ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx;
580                ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy;
581                break;
582            case 3:
583                /* median */
584                MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx,
585                       ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx,
586                       ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx,
587                       ps_pred_mv->s_mv.i2_mvx);
588                MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy,
589                       ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy,
590                       ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy,
591                       ps_pred_mv->s_mv.i2_mvy);
592
593                break;
594            default:
595                break;
596        }
597    }
598}
599
600/**
601*******************************************************************************
602*
603* @brief This function performs MV prediction
604*
605* @par Description:
606*
607* @param[in] ps_proc
608*  Process context corresponding to the job
609*
610* @returns  none
611*
612* @remarks none
613*  This function will update the MB availability since intra inter decision
614*  should be done before the call
615*
616*******************************************************************************
617*/
618void ih264e_mv_pred(process_ctxt_t *ps_proc, WORD32 i4_slice_type)
619{
620
621    /* left mb motion vector */
622    enc_pu_t *ps_left_mb_pu;
623
624    /* top left mb motion vector */
625    enc_pu_t *ps_top_left_mb_pu;
626
627    /* top row motion vector info */
628    enc_pu_t *ps_top_row_pu;
629
630    /* predicted motion vector */
631    enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
632
633    /* zero mv */
634    mv_t zero_mv = { 0, 0 };
635
636    /*  mb neighbor availability */
637    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
638
639    /* mb syntax elements of neighbors */
640    mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
641    mb_info_t *ps_top_left_syn;
642    UWORD32 u4_left_is_intra;
643
644    /* Temp var */
645    WORD32 i4_reflist, max_reflist, i4_cmpl_predmode;
646
647    ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ele);
648    u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
649    ps_left_mb_pu = &ps_proc->s_left_mb_pu;
650    ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
651    ps_top_row_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
652
653    /* Number of ref lists to process */
654    max_reflist = (i4_slice_type == PSLICE) ? 1 : 2;
655
656    for (i4_reflist = 0; i4_reflist < max_reflist; i4_reflist++)
657    {
658        i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
659
660        /* Before performing mv prediction prepare the ngbr information and
661         * reset motion vectors basing on their availability */
662        if (!ps_ngbr_avbl->u1_mb_a || (u4_left_is_intra == 1)
663                        || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
664        {
665            /* left mv */
666            ps_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx = 0;
667            ps_left_mb_pu->s_me_info[i4_reflist].s_mv = zero_mv;
668        }
669        if (!ps_ngbr_avbl->u1_mb_b || ps_top_syn->u2_is_intra
670                        || (ps_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
671        {
672            /* top mv */
673            ps_top_row_pu[0].s_me_info[i4_reflist].i1_ref_idx = 0;
674            ps_top_row_pu[0].s_me_info[i4_reflist].s_mv = zero_mv;
675        }
676
677        if (!ps_ngbr_avbl->u1_mb_c)
678        {
679            /* top right mv - When top right partition is not available for
680             * prediction if top left is available use it for prediction else
681             * set the mv information to -1 and (0, 0)
682             * */
683            if (!ps_ngbr_avbl->u1_mb_d || ps_top_left_syn->u2_is_intra
684                            || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
685            {
686                ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
687                ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
688            }
689            else
690            {
691                ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = ps_top_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx;
692                ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = ps_top_left_mb_pu->s_me_info[i4_reflist].s_mv;
693            }
694        }
695        else if(ps_top_syn[1].u2_is_intra
696                        || (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode))
697        {
698            ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
699            ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
700        }
701
702        ih264e_get_mv_predictor(ps_left_mb_pu, ps_top_row_pu, &ps_pred_mv[i4_reflist], i4_reflist);
703    }
704
705}
706
707/**
708*******************************************************************************
709*
710* @brief This function approximates Pred. MV
711*
712* @par Description:
713*
714* @param[in] ps_proc
715*  Process context corresponding to the job
716*
717* @returns  none
718*
719* @remarks none
720*  Motion estimation happens at nmb level. For cost calculations, mv is appro
721*  ximated using this function
722*
723*******************************************************************************
724*/
725void ih264e_mv_pred_me(process_ctxt_t *ps_proc, WORD32 i4_ref_list)
726{
727    /* left mb motion vector */
728    enc_pu_t *ps_left_mb_pu ;
729
730    /* top left mb motion vector */
731    enc_pu_t *ps_top_left_mb_pu ;
732
733    /* top row motion vector info */
734    enc_pu_t *ps_top_row_pu;
735
736    enc_pu_t s_top_row_pu[2];
737
738    /* predicted motion vector */
739    enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
740
741    /* zero mv */
742    mv_t zero_mv = {0, 0};
743
744    /* Complementary pred mode */
745    WORD32 i4_cmpl_predmode = (i4_ref_list == 0) ? PRED_L1 : PRED_L0;
746
747    /*  mb neighbor availability */
748    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
749
750    ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
751    ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
752    ps_top_row_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
753
754    s_top_row_pu[0] = ps_top_row_pu[0];
755    s_top_row_pu[1] = ps_top_row_pu[1];
756
757    /*
758     * Before performing mv prediction prepare the ngbr information and
759     * reset motion vectors basing on their availability
760     */
761
762    if (!ps_ngbr_avbl->u1_mb_a || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
763    {
764        /* left mv */
765        ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx = 0;
766        ps_left_mb_pu->s_me_info[i4_ref_list].s_mv = zero_mv;
767    }
768    if (!ps_ngbr_avbl->u1_mb_b || (s_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
769    {
770        /* top mv */
771        s_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx = 0;
772        s_top_row_pu[0].s_me_info[i4_ref_list].s_mv = zero_mv;
773
774    }
775    if (!ps_ngbr_avbl->u1_mb_c)
776    {
777        /* top right mv - When top right partition is not available for
778         * prediction if top left is available use it for prediction else
779         * set the mv information to -1 and (0, 0)
780         * */
781        if (!ps_ngbr_avbl->u1_mb_d || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
782        {
783            s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
784            s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
785
786            s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
787            s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
788        }
789        else
790        {
791            s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = ps_top_left_mb_pu->s_me_info[0].i1_ref_idx;
792            s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = ps_top_left_mb_pu->s_me_info[0].s_mv;
793        }
794    }
795    else if (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode)
796    {
797        ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
798        ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
799    }
800
801    ih264e_get_mv_predictor(ps_left_mb_pu, &(s_top_row_pu[0]),
802                            &ps_pred_mv[i4_ref_list], i4_ref_list);
803}
804
805/**
806*******************************************************************************
807*
808* @brief This function initializes me ctxt
809*
810* @par Description:
811*  Before dispatching the current job to me thread, the me context associated
812*  with the job is initialized.
813*
814* @param[in] ps_proc
815*  Process context corresponding to the job
816*
817* @returns  none
818*
819* @remarks none
820*
821*******************************************************************************
822*/
823void ih264e_init_me(process_ctxt_t *ps_proc)
824{
825    /* me ctxt */
826    me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
827
828    /* codec context */
829    codec_t *ps_codec = ps_proc->ps_codec;
830
831    ps_me_ctxt->i4_skip_bias[BSLICE] = SKIP_BIAS_B;
832
833    if (ps_codec->s_cfg.u4_num_bframes == 0)
834    {
835       ps_me_ctxt->i4_skip_bias[PSLICE] = 4 * SKIP_BIAS_P;
836    }
837    else
838    {
839       ps_me_ctxt->i4_skip_bias[PSLICE] =  SKIP_BIAS_P;
840    }
841
842    /* src ptr */
843    ps_me_ctxt->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma;
844    /* src stride */
845    ps_me_ctxt->i4_src_strd = ps_proc->i4_src_strd;
846
847    /* ref ptrs and corresponding lagrange params */
848    ps_me_ctxt->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma[0];
849    ps_me_ctxt->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma[1];
850
851    ps_me_ctxt->u4_lambda_motion = gu1_qp0[ps_me_ctxt->u1_mb_qp];
852
853
854}
855
856
857/**
858*******************************************************************************
859*
860* @brief This function performs motion estimation for the current mb using
861*   single reference list
862*
863* @par Description:
864*  The current mb is compared with a list of mb's in the reference frame for
865*  least cost. The mb that offers least cost is chosen as predicted mb and the
866*  displacement of the predicted mb from index location of the current mb is
867*  signaled as mv. The list of the mb's that are chosen in the reference frame
868*  are dependent on the speed of the ME configured.
869*
870* @param[in] ps_proc
871*  Process context corresponding to the job
872*
873* @returns  motion vector of the pred mb, sad, cost.
874*
875* @remarks none
876*
877*******************************************************************************
878*/
879void ih264e_compute_me_single_reflist(process_ctxt_t *ps_proc)
880{
881    /* me ctxt */
882    me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
883
884    /* codec context */
885    codec_t *ps_codec = ps_proc->ps_codec;
886
887    /* recon stride */
888    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
889
890    /* source buffer for halp pel generation functions */
891    UWORD8 *pu1_hpel_src;
892
893    /* quantization parameters */
894    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
895
896    /* Mb part ctxts for SKIP */
897    mb_part_ctxt s_skip_mbpart;
898
899    /* Sad therholds */
900    ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
901
902    {
903        WORD32 rows_above, rows_below, columns_left, columns_right;
904
905        /* During evaluation for motion vectors do not search through padded regions */
906        /* Obtain number of rows and columns that are effective for computing for me evaluation */
907        rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
908        rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
909        columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
910        columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
911
912        /* init srch range */
913        /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
914         * on all sides.
915         */
916        ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
917        ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
918        ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
919        ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
920
921        /* this is to facilitate fast sub pel computation with minimal loads */
922        ps_me_ctxt->i4_srch_range_w += 1;
923        ps_me_ctxt->i4_srch_range_e -= 1;
924        ps_me_ctxt->i4_srch_range_n += 1;
925        ps_me_ctxt->i4_srch_range_s -= 1;
926    }
927
928    /* Compute ME and store the MVs */
929
930    /***********************************************************************
931     * Compute ME for list L0
932     ***********************************************************************/
933
934    /* Init SATQD for the current list */
935    ps_me_ctxt->u4_min_sad_reached  = 0;
936    ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
937
938    /* Get the seed motion vector candidates                    */
939    ih264e_get_search_candidates(ps_proc, ps_me_ctxt, PRED_L0);
940
941    /* ****************************************************************
942     *Evaluate the SKIP for current list
943     * ****************************************************************/
944    s_skip_mbpart.s_mv_curr.i2_mvx = 0;
945    s_skip_mbpart.s_mv_curr.i2_mvy = 0;
946    s_skip_mbpart.i4_mb_cost = INT_MAX;
947    s_skip_mbpart.i4_mb_distortion = INT_MAX;
948
949    ime_compute_skip_cost( ps_me_ctxt,
950                           (ime_mv_t *)(&ps_proc->ps_skip_mv[PRED_L0].s_mv),
951                           &s_skip_mbpart,
952                           ps_proc->ps_codec->s_cfg.u4_enable_satqd,
953                           PRED_L0,
954                           0 /* Not a Bslice */ );
955
956    s_skip_mbpart.s_mv_curr.i2_mvx <<= 2;
957    s_skip_mbpart.s_mv_curr.i2_mvy <<= 2;
958
959    /******************************************************************
960     * Evaluate ME For current list
961     *****************************************************************/
962    ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx = 0;
963    ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy = 0;
964    ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = INT_MAX;
965    ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = INT_MAX;
966
967    /* Init Hpel */
968    ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf = NULL;
969
970    /* In case we found out the minimum SAD, exit the ME eval */
971    if (!ps_me_ctxt->u4_min_sad_reached)
972    {
973        /* Evaluate search candidates for initial mv pt */
974        ime_evaluate_init_srchposn_16x16(ps_me_ctxt, PRED_L0);
975
976        /********************************************************************/
977        /*                  full pel motion estimation                      */
978        /********************************************************************/
979        ime_full_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
980
981        /* Scale the MV to qpel resolution */
982        ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx <<= 2;
983        ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy <<= 2;
984
985        if (ps_me_ctxt->u4_enable_hpel)
986        {
987            /* moving src pointer to the converged motion vector location*/
988            pu1_hpel_src =   ps_me_ctxt->apu1_ref_buf_luma[PRED_L0]
989                             + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx >> 2)
990                             + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy >> 2)* i4_rec_strd;
991
992            ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
993            ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
994            ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
995
996            ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
997
998            /* half  pel search is done for both sides of full pel,
999             * hence half_x of width x height = 17x16 is created
1000             * starting from left half_x of converged full pel */
1001            pu1_hpel_src -= 1;
1002
1003            /* computing half_x */
1004            ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
1005                                                  ps_me_ctxt->apu1_subpel_buffs[0],
1006                                                  i4_rec_strd,
1007                                                  ps_me_ctxt->u4_subpel_buf_strd);
1008
1009            /*
1010             * Halfpel search is done for both sides of full pel,
1011             * hence half_y of width x height = 16x17 is created
1012             * starting from top half_y of converged full pel
1013             * for half_xy top_left is required
1014             * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
1015             */
1016            pu1_hpel_src -= i4_rec_strd;
1017
1018            /* computing half_y , and half_xy*/
1019            ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
1020                            pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
1021                            ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
1022                            ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
1023                            ps_me_ctxt->u4_subpel_buf_strd);
1024
1025            ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
1026        }
1027    }
1028
1029
1030    /***********************************************************************
1031     * If a particular skiip Mv is giving better sad, copy to the corresponding
1032     * MBPART
1033     * In B slices this loop should go only to PREDL1: If we found min sad
1034     * we will go to the skip ref list only
1035     * Have to find a way to make it without too much change or new vars
1036     **********************************************************************/
1037    if (s_skip_mbpart.i4_mb_cost < ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost)
1038    {
1039        ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = s_skip_mbpart.i4_mb_cost;
1040        ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = s_skip_mbpart.i4_mb_distortion;
1041        ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = s_skip_mbpart.s_mv_curr;
1042    }
1043    else if (ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf)
1044    {
1045        /* Now we have to copy the buffers */
1046        ps_codec->pf_inter_pred_luma_copy(
1047                        ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf,
1048                        ps_proc->pu1_best_subpel_buf,
1049                        ps_me_ctxt->u4_subpel_buf_strd,
1050                        ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
1051                        NULL, 0);
1052    }
1053
1054    /**********************************************************************
1055     * Now get the minimum of MB part sads by searching over all ref lists
1056     **********************************************************************/
1057    ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx;
1058    ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy;
1059    ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost;
1060    ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion;
1061    ps_proc->ps_cur_mb->u4_mb_type = P16x16;
1062    ps_proc->ps_pu->b2_pred_mode = PRED_L0 ;
1063
1064    /* Mark the reflists */
1065    ps_proc->ps_pu->s_me_info[0].i1_ref_idx = -1;
1066    ps_proc->ps_pu->s_me_info[1].i1_ref_idx =  0;
1067
1068    /* number of partitions */
1069    ps_proc->u4_num_sub_partitions = 1;
1070    *(ps_proc->pu4_mb_pu_cnt) = 1;
1071
1072    /* position in-terms of PU */
1073    ps_proc->ps_pu->b4_pos_x = 0;
1074    ps_proc->ps_pu->b4_pos_y = 0;
1075
1076    /* PU size */
1077    ps_proc->ps_pu->b4_wd = 3;
1078    ps_proc->ps_pu->b4_ht = 3;
1079
1080    /* Update min sad conditions */
1081    if (ps_me_ctxt->u4_min_sad_reached == 1)
1082    {
1083        ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
1084        ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
1085    }
1086
1087}
1088
1089/**
1090*******************************************************************************
1091*
1092* @brief This function performs motion estimation for the current NMB
1093*
1094* @par Description:
1095* Intializes input and output pointers required by the function ih264e_compute_me
1096* and calls the function ih264e_compute_me in a loop to process NMBs.
1097*
1098* @param[in] ps_proc
1099*  Process context corresponding to the job
1100*
1101* @returns
1102*
1103* @remarks none
1104*
1105*******************************************************************************
1106*/
1107void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
1108{
1109    /* pic pu */
1110    enc_pu_t *ps_pu_begin = ps_proc->ps_pu;
1111
1112    /* ME map */
1113    UWORD8 *pu1_me_map = ps_proc->pu1_me_map + (ps_proc->i4_mb_y * ps_proc->i4_wd_mbs);
1114
1115    /* temp var */
1116    UWORD32 u4_i;
1117
1118    ps_proc->s_me_ctxt.u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
1119    ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->s_left_mb_syntax_ele.u2_mb_type == PSKIP);
1120
1121    for (u4_i = 0; u4_i < u4_nmb_count; u4_i++)
1122    {
1123        /* Wait for ME map */
1124        if (ps_proc->i4_mb_y > 0)
1125        {
1126            /* Wait for top right ME to be done */
1127            UWORD8 *pu1_me_map_tp_rw = ps_proc->pu1_me_map + (ps_proc->i4_mb_y - 1) * ps_proc->i4_wd_mbs;
1128
1129            while (1)
1130            {
1131                volatile UWORD8 *pu1_buf;
1132                WORD32 idx = ps_proc->i4_mb_x + u4_i + 1;
1133
1134                idx = MIN(idx, (ps_proc->i4_wd_mbs - 1));
1135                pu1_buf =  pu1_me_map_tp_rw + idx;
1136                if(*pu1_buf)
1137                    break;
1138                ithread_yield();
1139            }
1140        }
1141
1142        ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].as_skip_mv[0]);
1143        ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl);
1144        ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].as_pred_mv[0]);
1145
1146        ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]);
1147
1148        ps_proc->ps_cur_mb->u4_min_sad = ps_proc->u4_min_sad;
1149        ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1150
1151        ps_proc->ps_cur_mb->i4_mb_cost = INT_MAX;
1152        ps_proc->ps_cur_mb->i4_mb_distortion = SHRT_MAX;
1153
1154        /* Set the best subpel buf to the correct mb so that the buffer can be copied */
1155        ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_i].pu1_best_sub_pel_buf;
1156        ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_i].u4_bst_spel_buf_strd;
1157
1158        /* Set the min sad conditions */
1159        ps_proc->ps_cur_mb->u4_min_sad = ps_proc->ps_codec->u4_min_sad;
1160        ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1161
1162        /* Derive neighbor availability for the current macroblock */
1163        ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
1164
1165        /* init me */
1166        ih264e_init_me(ps_proc);
1167
1168        /* Compute ME according to slice type */
1169        ps_proc->ps_codec->apf_compute_me[ps_proc->i4_slice_type](ps_proc);
1170
1171        /* update top and left structs */
1172        {
1173            mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1174            mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME);
1175            enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
1176            enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
1177            enc_pu_t *ps_top_mv = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
1178
1179            *ps_top_left_syn = *ps_top_syn;
1180
1181            *ps_top_left_mb_pu = *ps_top_mv;
1182            *ps_left_mb_pu = *ps_proc->ps_pu;
1183        }
1184
1185        ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1186
1187        /* Copy the min sad reached info */
1188        ps_proc->ps_nmb_info[u4_i].u4_min_sad_reached = ps_proc->ps_cur_mb->u4_min_sad_reached;
1189        ps_proc->ps_nmb_info[u4_i].u4_min_sad   = ps_proc->ps_cur_mb->u4_min_sad;
1190
1191        /*
1192         * To make sure that the MV map is properly sync to the
1193         * cache we need to do a DDB
1194         */
1195        {
1196            DATA_SYNC();
1197
1198            pu1_me_map[ps_proc->i4_mb_x] = 1;
1199        }
1200        ps_proc->i4_mb_x++;
1201
1202        ps_proc->s_me_ctxt.u4_left_is_intra = 0;
1203        ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->ps_cur_mb->u4_mb_type  == PSKIP);
1204
1205        /* update buffers pointers */
1206        ps_proc->pu1_src_buf_luma += MB_SIZE;
1207        ps_proc->pu1_rec_buf_luma += MB_SIZE;
1208        ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1209        ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1210
1211        /*
1212         * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1213         * the stride per MB is MB_SIZE
1214         */
1215        ps_proc->pu1_src_buf_chroma += MB_SIZE;
1216        ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1217        ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1218        ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1219
1220
1221        ps_proc->pu4_mb_pu_cnt += 1;
1222    }
1223
1224
1225    ps_proc->ps_pu = ps_pu_begin;
1226    ps_proc->i4_mb_x = ps_proc->i4_mb_x - u4_nmb_count;
1227
1228    /* update buffers pointers */
1229    ps_proc->pu1_src_buf_luma -= MB_SIZE * u4_nmb_count;
1230    ps_proc->pu1_rec_buf_luma -= MB_SIZE * u4_nmb_count;
1231    ps_proc->apu1_ref_buf_luma[0] -= MB_SIZE * u4_nmb_count;
1232    ps_proc->apu1_ref_buf_luma[1] -= MB_SIZE * u4_nmb_count;
1233
1234    /*
1235     * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1236     * the stride per MB is MB_SIZE
1237     */
1238    ps_proc->pu1_src_buf_chroma -= MB_SIZE * u4_nmb_count;
1239    ps_proc->pu1_rec_buf_chroma -= MB_SIZE * u4_nmb_count;
1240    ps_proc->apu1_ref_buf_chroma[0] -= MB_SIZE * u4_nmb_count;
1241    ps_proc->apu1_ref_buf_chroma[1] -= MB_SIZE * u4_nmb_count;
1242
1243
1244    ps_proc->pu4_mb_pu_cnt -= u4_nmb_count;
1245}
1246
1247
1248/**
1249*******************************************************************************
1250*
1251* @brief The function computes parameters for a BSKIP MB
1252*
1253* @par Description:
1254*  The function updates the skip motion vector for B Mb, check if the Mb can be
1255*  marked as skip and returns it
1256*
1257* @param[in] ps_proc
1258*  Pointer to process context
1259*
1260* @param[in] u4_for_me
1261*  Dummy
1262*
1263* @param[in] i4_reflist
1264*  Dummy
1265*
1266* @returns Flag indicating if the current Mb can be skip or not
1267*
1268* @remarks
1269*   The code implements the logic as described in sec 8.4.1.2.2
1270*   It also computes co-located MB parmas according to sec 8.4.1.2.1
1271*
1272*   Need to add condition for this fucntion to be used in ME
1273*
1274*******************************************************************************/
1275WORD32 ih264e_find_bskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
1276{
1277    /* Colzero for co-located MB */
1278    WORD32 i4_colzeroflag;
1279
1280    /* motion vectors for neighbouring MBs */
1281    enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
1282
1283    /* Variables to check if a particular mB is available */
1284    WORD32 i4_a, i4_b, i4_c, i4_c_avail;
1285
1286    /* Mode availability, init to no modes available     */
1287    WORD32 i4_mode_avail;
1288
1289    /*  mb neighbor availability */
1290    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1291
1292    /* Temp var */
1293    WORD32 i, i4_cmpl_mode, i4_skip_type = -1;
1294
1295    /*
1296     * Colocated motion vector
1297     */
1298    mv_t s_mvcol;
1299
1300    /*
1301     * Colocated picture idx
1302     */
1303    WORD32 i4_refidxcol;
1304
1305    UNUSED(i4_reflist);
1306
1307    /**************************************************************************
1308     *Find co-located MB parameters
1309     *      See sec 8.4.1.2.1  for reference
1310     **************************************************************************/
1311    {
1312        /*
1313         * Find the co-located Mb and update the skip and pred appropriately
1314         * 1) Default colpic is forward ref : Table 8-6
1315         * 2) Default mb col is current MB : Table 8-8
1316         */
1317
1318        if (ps_proc->ps_colpu->b1_intra_flag)
1319        {
1320            s_mvcol.i2_mvx = 0;
1321            s_mvcol.i2_mvy = 0;
1322            i4_refidxcol = -1;
1323        }
1324        else
1325        {
1326            if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
1327            {
1328                s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
1329                i4_refidxcol = 0;
1330            }
1331            else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
1332            {
1333                s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
1334                i4_refidxcol = 0;
1335            }
1336        }
1337
1338        /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as default */
1339        i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
1340                        && (ABS(s_mvcol.i2_mvy) <= 1));
1341
1342    }
1343
1344    /***************************************************************************
1345     * Evaluating skip params : Spatial Skip
1346     **************************************************************************/
1347    {
1348    /* Get the neighbouring MBS according to Section 8.4.1.2.2 */
1349    ps_a_pu = &ps_proc->s_left_mb_pu_ME;
1350    ps_b_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
1351
1352    i4_c_avail = 0;
1353    if (ps_ngbr_avbl->u1_mb_c)
1354    {
1355        ps_c_pu = &((ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x)[1]);
1356        i4_c_avail = 1;
1357    }
1358    else
1359    {
1360        ps_c_pu = &ps_proc->s_top_left_mb_pu_ME;
1361        i4_c_avail = ps_ngbr_avbl->u1_mb_d;
1362    }
1363
1364    i4_a = ps_ngbr_avbl->u1_mb_a;
1365    i4_b = ps_ngbr_avbl->u1_mb_b;
1366    i4_c = i4_c_avail;
1367
1368    /* Init to no mode avail */
1369    i4_mode_avail = 0;
1370    for (i = 0; i < 2; i++)
1371    {
1372        i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
1373
1374        i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1375        i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1376        i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1377    }
1378
1379    if (i4_mode_avail == 0x3 || i4_mode_avail == 0x0)
1380    {
1381        i4_skip_type= PRED_BI;
1382    }
1383    else if(i4_mode_avail == 0x1)
1384    {
1385        i4_skip_type = PRED_L0;
1386    }
1387    else if(i4_mode_avail == 0x2)
1388    {
1389        i4_skip_type = PRED_L1;
1390    }
1391
1392    /* Update skip MV for L0 */
1393    if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
1394    {
1395        ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
1396        ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
1397    }
1398    else
1399    {
1400        ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
1401        ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
1402    }
1403
1404    /* Update skip MV for L1 */
1405    if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
1406    {
1407        ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
1408        ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
1409    }
1410    else
1411    {
1412        ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
1413        ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
1414    }
1415
1416    }
1417
1418    /***************************************************************************
1419     * Evaluating skip params : Temporal skip
1420     **************************************************************************/
1421    {
1422        pic_buf_t *  ps_ref_pic[MAX_REF_PIC_CNT];
1423        WORD32 i4_td, i4_tx, i4_tb, i4_dist_scale_factor;
1424        enc_pu_mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[2];
1425
1426        ps_ref_pic[PRED_L0] = ps_proc->aps_ref_pic[PRED_L0];
1427        ps_ref_pic[PRED_L1] = ps_proc->aps_ref_pic[PRED_L1];
1428
1429        i4_tb = ps_proc->ps_codec->i4_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
1430        i4_td = ps_ref_pic[PRED_L1]->i4_abs_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
1431
1432        i4_tb = CLIP3(-128, 127, i4_tb);
1433        i4_td = CLIP3(-128, 127, i4_td);
1434
1435        i4_tx = ( 16384 + ABS( i4_td / 2 ) ) / i4_td ;
1436        i4_dist_scale_factor =  CLIP3( -1024, 1023, ( i4_tb * i4_tx + 32 ) >> 6 );
1437
1438        /* Motion vectors taken in full pel resolution , hence  -> (& 0xfffc) operation */
1439        ps_skip_mv[PRED_L0].s_mv.i2_mvx = (( i4_dist_scale_factor * s_mvcol.i2_mvx + 128 ) >> 8) & 0xfffc;
1440        ps_skip_mv[PRED_L0].s_mv.i2_mvy = (( i4_dist_scale_factor * s_mvcol.i2_mvy + 128 ) >> 8) & 0xfffc;
1441
1442        ps_skip_mv[PRED_L1].s_mv.i2_mvx = (ps_skip_mv[PRED_L0].s_mv.i2_mvx - s_mvcol.i2_mvx) & 0xfffc;
1443        ps_skip_mv[PRED_L1].s_mv.i2_mvy = (ps_skip_mv[PRED_L0].s_mv.i2_mvy - s_mvcol.i2_mvy) & 0xfffc;
1444
1445    }
1446
1447    return i4_skip_type;
1448}
1449
1450/**
1451*******************************************************************************
1452*
1453* @brief The function computes the skip motion vectoe for B mb
1454*
1455* @par Description:
1456*  The function gives the skip motion vector for B Mb, check if the Mb can be
1457*  marked as skip
1458*
1459* @param[in] ps_proc
1460*  Pointer to process context
1461*
1462* @param[in] u4_for_me
1463*  Dummy
1464*
1465* @param[in] u4_for_me
1466*  Dummy
1467*
1468* @returns Flag indicating if the current Mb can be skip or not
1469*
1470* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
1471*   specification. It also computes co-located MB parmas according to sec 8.4.1.2.1
1472*
1473*******************************************************************************/
1474WORD32 ih264e_find_bskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
1475{
1476    WORD32 i4_colzeroflag;
1477
1478    /* motion vectors */
1479    enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
1480
1481    /* Syntax elem */
1482    mb_info_t *ps_a_syn, *ps_b_syn, *ps_c_syn;
1483
1484    /* Variables to check if a particular mB is available */
1485    WORD32 i4_a, i4_b, i4_c, i4_c_avail;
1486
1487    /* Mode availability, init to no modes available     */
1488    WORD32 i4_mode_avail;
1489
1490    /*  mb neighbor availability */
1491    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1492
1493    /* Temp var */
1494    WORD32 i, i4_cmpl_mode;
1495
1496    UNUSED(i4_reflist);
1497
1498    /**************************************************************************
1499     *Find co-locates parameters
1500     *      See sec 8.4.1.2.1  for reference
1501     **************************************************************************/
1502    {
1503        /*
1504         * Find the co-located Mb and update the skip and pred appropriately
1505         * 1) Default colpic is forward ref : Table 8-6
1506         * 2) Default mb col is current MB : Table 8-8
1507         */
1508
1509        mv_t s_mvcol;
1510        WORD32 i4_refidxcol;
1511
1512        if (ps_proc->ps_colpu->b1_intra_flag)
1513        {
1514            s_mvcol.i2_mvx = 0;
1515            s_mvcol.i2_mvy = 0;
1516            i4_refidxcol = -1;
1517        }
1518        else
1519        {
1520            if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
1521            {
1522                s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
1523                i4_refidxcol = 0;
1524            }
1525            else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
1526            {
1527                s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
1528                i4_refidxcol = 0;
1529            }
1530        }
1531
1532        /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as default */
1533        i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
1534                        && (ABS(s_mvcol.i2_mvy) <= 1));
1535
1536    }
1537
1538    /***************************************************************************
1539     * Evaluating skip params
1540     **************************************************************************/
1541    /* Section 8.4.1.2.2 */
1542    ps_a_syn = &ps_proc->s_left_mb_syntax_ele;
1543    ps_a_pu = &ps_proc->s_left_mb_pu;
1544
1545    ps_b_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1546    ps_b_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
1547
1548    i4_c_avail = 0;
1549    if (ps_ngbr_avbl->u1_mb_c)
1550    {
1551        ps_c_syn = &((ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x)[1]);
1552        ps_c_pu = &((ps_proc->ps_top_row_pu + ps_proc->i4_mb_x)[1]);
1553        i4_c_avail = 1;
1554    }
1555    else
1556    {
1557        ps_c_syn = &(ps_proc->s_top_left_mb_syntax_ele);
1558        ps_c_pu = &ps_proc->s_top_left_mb_pu;
1559        i4_c_avail = ps_ngbr_avbl->u1_mb_d;
1560    }
1561
1562
1563    i4_a = ps_ngbr_avbl->u1_mb_a;
1564    i4_a &= !ps_a_syn->u2_is_intra;
1565
1566    i4_b = ps_ngbr_avbl->u1_mb_b;
1567    i4_b &= !ps_b_syn->u2_is_intra;
1568
1569    i4_c = i4_c_avail;
1570    i4_c &= !ps_c_syn->u2_is_intra;
1571
1572    /* Init to no mode avail */
1573    i4_mode_avail = 0;
1574    for (i = 0; i < 2; i++)
1575    {
1576        i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
1577
1578        i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1579        i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1580        i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1581    }
1582
1583    /* Update skip MV for L0 */
1584    if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
1585    {
1586        ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
1587        ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
1588    }
1589    else
1590    {
1591        ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
1592        ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
1593    }
1594
1595    /* Update skip MV for L1 */
1596    if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
1597    {
1598        ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
1599        ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
1600    }
1601    else
1602    {
1603        ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
1604        ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
1605    }
1606
1607    /* Now see if the ME information matches the SKIP information */
1608    switch (ps_proc->ps_pu->b2_pred_mode)
1609    {
1610        case PRED_BI:
1611            if (  (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
1612               && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
1613               && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
1614               && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
1615               && (i4_mode_avail ==  0x3 || i4_mode_avail == 0x0))
1616            {
1617                return 1;
1618            }
1619            break;
1620
1621        case PRED_L0:
1622            if ( (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
1623              && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
1624              && (i4_mode_avail == 0x1))
1625            {
1626                return 1;
1627            }
1628            break;
1629
1630        case PRED_L1:
1631            if (  (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
1632               && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
1633               && (i4_mode_avail == 0x2))
1634            {
1635                return 1;
1636            }
1637            break;
1638    }
1639
1640    return 0;
1641}
1642
1643
1644/**
1645*******************************************************************************
1646*
1647* @brief This function computes the best motion vector among the tentative mv
1648* candidates chosen.
1649*
1650* @par Description:
1651*  This function determines the position in the search window at which the motion
1652*  estimation should begin in order to minimise the number of search iterations.
1653*
1654* @param[in] ps_mb_part
1655*  pointer to current mb partition ctxt with respect to ME
1656*
1657* @param[in] u4_lambda_motion
1658*  lambda motion
1659*
1660* @param[in] u4_fast_flag
1661*  enable/disable fast sad computation
1662*
1663* @returns  mv pair & corresponding distortion and cost
1664*
1665* @remarks Currently onyl 4 search candiates are supported
1666*
1667*******************************************************************************
1668*/
1669void ih264e_evaluate_bipred(me_ctxt_t *ps_me_ctxt,
1670                            process_ctxt_t *ps_proc,
1671                            mb_part_ctxt *ps_mb_ctxt_bi)
1672{
1673
1674    UWORD32 i, u4_fast_sad;
1675
1676    WORD32 i4_dest_buff;
1677
1678    mv_t *ps_l0_pred_mv, *ps_l1_pred_mv, s_l0_mv, s_l1_mv;
1679
1680    UWORD8 *pu1_ref_mb_l0, *pu1_ref_mb_l1;
1681
1682    UWORD8 *pu1_dst_buf;
1683
1684    WORD32 i4_ref_l0_stride, i4_ref_l1_stride;
1685
1686    WORD32 i4_mb_distortion, i4_mb_cost;
1687
1688    u4_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
1689
1690    i4_dest_buff = 0;
1691    for (i = 0; i < ps_me_ctxt->u4_num_candidates[PRED_BI]; i += 2)
1692    {
1693        pu1_dst_buf = ps_me_ctxt->apu1_subpel_buffs[i4_dest_buff];
1694
1695        s_l0_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx >> 2;
1696        s_l0_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy >> 2;
1697        s_l1_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx >> 2;
1698        s_l1_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy >> 2;
1699
1700        ps_l0_pred_mv = &ps_proc->ps_pred_mv[PRED_L0].s_mv;
1701        ps_l1_pred_mv = &ps_proc->ps_pred_mv[PRED_L1].s_mv;
1702
1703        if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx & 0x3)||
1704                        (ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy & 0x3))
1705        {
1706            pu1_ref_mb_l0 = ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf;
1707            i4_ref_l0_stride = ps_me_ctxt->u4_subpel_buf_strd;
1708        }
1709        else
1710        {
1711            pu1_ref_mb_l0 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L0] + (s_l0_mv.i2_mvx) + ((s_l0_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
1712            i4_ref_l0_stride = ps_me_ctxt->i4_rec_strd;
1713        }
1714
1715
1716        if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx & 0x3) ||
1717                        (ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy & 0x3))
1718        {
1719            pu1_ref_mb_l1 = ps_me_ctxt->as_mb_part[PRED_L1].pu1_best_hpel_buf;
1720            i4_ref_l1_stride = ps_me_ctxt->u4_subpel_buf_strd;
1721        }
1722        else
1723        {
1724            pu1_ref_mb_l1 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L1] + (s_l1_mv.i2_mvx) + ((s_l1_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
1725            i4_ref_l1_stride = ps_me_ctxt->i4_rec_strd;
1726        }
1727
1728        ps_proc->ps_codec->pf_inter_pred_luma_bilinear(
1729                        pu1_ref_mb_l0, pu1_ref_mb_l1, pu1_dst_buf,
1730                        i4_ref_l0_stride, i4_ref_l1_stride,
1731                        ps_me_ctxt->u4_subpel_buf_strd, MB_SIZE, MB_SIZE);
1732
1733        ps_me_ctxt->pf_ime_compute_sad_16x16[u4_fast_sad](
1734                        ps_me_ctxt->pu1_src_buf_luma, pu1_dst_buf,
1735                        ps_me_ctxt->i4_src_strd, ps_me_ctxt->u4_subpel_buf_strd,
1736                        INT_MAX, &i4_mb_distortion);
1737
1738        /* compute cost */
1739        i4_mb_cost =  ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx - ps_l0_pred_mv->i2_mvx];
1740        i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy - ps_l0_pred_mv->i2_mvy];
1741        i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx - ps_l1_pred_mv->i2_mvx];
1742        i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy - ps_l1_pred_mv->i2_mvy];
1743
1744        i4_mb_cost -= (ps_me_ctxt->i4_skip_bias[BSLICE]) * (ps_me_ctxt->i4_skip_type == PRED_BI) * (i == 0);
1745
1746
1747        i4_mb_cost *= ps_me_ctxt->u4_lambda_motion;
1748        i4_mb_cost += i4_mb_distortion;
1749
1750        if (i4_mb_cost < ps_mb_ctxt_bi->i4_mb_cost)
1751        {
1752            ps_mb_ctxt_bi->i4_srch_pos_idx = (i>>1);
1753            ps_mb_ctxt_bi->i4_mb_cost = i4_mb_cost;
1754            ps_mb_ctxt_bi->i4_mb_distortion = i4_mb_distortion;
1755            ps_mb_ctxt_bi->pu1_best_hpel_buf = pu1_dst_buf;
1756            i4_dest_buff = (i4_dest_buff + 1) % 2;
1757        }
1758    }
1759
1760}
1761
1762/**
1763*******************************************************************************
1764*
1765* @brief This function performs motion estimation for the current mb
1766*
1767* @par Description:
1768*  The current mb is compared with a list of mb's in the reference frame for
1769*  least cost. The mb that offers least cost is chosen as predicted mb and the
1770*  displacement of the predicted mb from index location of the current mb is
1771*  signaled as mv. The list of the mb's that are chosen in the reference frame
1772*  are dependent on the speed of the ME configured.
1773*
1774* @param[in] ps_proc
1775*  Process context corresponding to the job
1776*
1777* @returns  motion vector of the pred mb, sad, cost.
1778*
1779* @remarks none
1780*
1781*******************************************************************************
1782*/
1783void ih264e_compute_me_multi_reflist(process_ctxt_t *ps_proc)
1784{
1785    /* me ctxt */
1786    me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
1787
1788    /* codec context */
1789    codec_t *ps_codec = ps_proc->ps_codec;
1790
1791    /* Temp variables for looping over ref lists */
1792    WORD32 i4_reflist, i4_max_reflist;
1793
1794    /* recon stride */
1795    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1796
1797    /* source buffer for halp pel generation functions */
1798    UWORD8 *pu1_hpel_src;
1799
1800    /* quantization parameters */
1801    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1802
1803    /* Mb part ctxts for SKIP */
1804    mb_part_ctxt as_skip_mbpart[2];
1805
1806    /* Sad therholds */
1807    ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
1808
1809    {
1810        WORD32 rows_above, rows_below, columns_left, columns_right;
1811
1812        /* During evaluation for motion vectors do not search through padded regions */
1813        /* Obtain number of rows and columns that are effective for computing for me evaluation */
1814        rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
1815        rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
1816        columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
1817        columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
1818
1819        /* init srch range */
1820        /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
1821         * on all sides.
1822         */
1823        ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1824        ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1825        ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1826        ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1827
1828        /* this is to facilitate fast sub pel computation with minimal loads */
1829        if (ps_me_ctxt->u4_enable_hpel)
1830        {
1831            ps_me_ctxt->i4_srch_range_w += 1;
1832            ps_me_ctxt->i4_srch_range_e -= 1;
1833            ps_me_ctxt->i4_srch_range_n += 1;
1834            ps_me_ctxt->i4_srch_range_s -= 1;
1835        }
1836    }
1837
1838    /* Compute ME and store the MVs */
1839    {
1840        /***********************************************************************
1841         * Compute ME for lists L0 and L1
1842         *  For L0 -> L0 skip + L0
1843         *  for L1 -> L0 skip + L0 + L1 skip + L1
1844         ***********************************************************************/
1845        i4_max_reflist = (ps_proc->i4_slice_type == PSLICE) ? PRED_L0 : PRED_L1;
1846
1847        /* Init SATQD for the current list */
1848        ps_me_ctxt->u4_min_sad_reached  = 0;
1849        ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
1850
1851        for (i4_reflist = PRED_L0; i4_reflist <= i4_max_reflist; i4_reflist++)
1852        {
1853
1854            /* Get the seed motion vector candidates                    */
1855            ih264e_get_search_candidates(ps_proc, ps_me_ctxt, i4_reflist);
1856
1857            /* ****************************************************************
1858             *Evaluate the SKIP for current list
1859             * ****************************************************************/
1860            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx = 0;
1861            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy = 0;
1862            as_skip_mbpart[i4_reflist].i4_mb_cost = INT_MAX;
1863            as_skip_mbpart[i4_reflist].i4_mb_distortion = INT_MAX;
1864
1865            if (ps_me_ctxt->i4_skip_type == i4_reflist)
1866            {
1867                ime_compute_skip_cost( ps_me_ctxt,
1868                                       (ime_mv_t *)(&ps_proc->ps_skip_mv[i4_reflist].s_mv),
1869                                       &as_skip_mbpart[i4_reflist],
1870                                       ps_proc->ps_codec->s_cfg.u4_enable_satqd,
1871                                       i4_reflist,
1872                                       (ps_proc->i4_slice_type == BSLICE) );
1873            }
1874
1875            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx <<= 2;
1876            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy <<= 2;
1877
1878            /******************************************************************
1879             * Evaluate ME For current list
1880             *****************************************************************/
1881            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx = 0;
1882            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy = 0;
1883            ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = INT_MAX;
1884            ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = INT_MAX;
1885
1886            /* Init Hpel */
1887            ps_me_ctxt->as_mb_part[i4_reflist].pu1_best_hpel_buf = NULL;
1888
1889            /* In case we found out the minimum SAD, exit the ME eval */
1890            if (ps_me_ctxt->u4_min_sad_reached)
1891            {
1892                i4_max_reflist = i4_reflist;
1893                break;
1894            }
1895
1896
1897            /* Evaluate search candidates for initial mv pt */
1898            ime_evaluate_init_srchposn_16x16(ps_me_ctxt, i4_reflist);
1899
1900            /********************************************************************/
1901            /*                  full pel motion estimation                      */
1902            /********************************************************************/
1903            ime_full_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
1904
1905            DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx >> 2),
1906                                   (ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy >> 2));
1907
1908            DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 1);
1909
1910            /* Scale the MV to qpel resolution */
1911            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx <<= 2;
1912            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy <<= 2;
1913
1914            if (ps_me_ctxt->u4_enable_hpel)
1915            {
1916                /* moving src pointer to the converged motion vector location */
1917                pu1_hpel_src =   ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]
1918                               + (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2)
1919                               + ((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2)* i4_rec_strd);
1920
1921                ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
1922                ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
1923                ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
1924
1925                /* Init the search position to an invalid number */
1926                ps_me_ctxt->as_mb_part[i4_reflist].i4_srch_pos_idx = 3;
1927
1928                /* Incase a buffer is still in use by L0, replace it with spare buff */
1929                ps_me_ctxt->apu1_subpel_buffs[ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx] =
1930                                ps_proc->apu1_subpel_buffs[3];
1931
1932
1933                ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
1934
1935                /* half  pel search is done for both sides of full pel,
1936                 * hence half_x of width x height = 17x16 is created
1937                 * starting from left half_x of converged full pel */
1938                pu1_hpel_src -= 1;
1939
1940                /* computing half_x */
1941                ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
1942                                                      ps_me_ctxt->apu1_subpel_buffs[0],
1943                                                      i4_rec_strd,
1944                                                      ps_me_ctxt->u4_subpel_buf_strd);
1945
1946                /*
1947                 * Halfpel search is done for both sides of full pel,
1948                 * hence half_y of width x height = 16x17 is created
1949                 * starting from top half_y of converged full pel
1950                 * for half_xy top_left is required
1951                 * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
1952                 */
1953                pu1_hpel_src -= i4_rec_strd;
1954
1955                /* computing half_y and half_xy */
1956                ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
1957                                pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
1958                                ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
1959                                ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
1960                                ps_me_ctxt->u4_subpel_buf_strd);
1961
1962                ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
1963
1964            }
1965        }
1966
1967        /***********************************************************************
1968         * If a particular skiip Mv is giving better sad, copy to the corresponding
1969         * MBPART
1970         * In B slices this loop should go only to PREDL1: If we found min sad
1971         * we will go to the skip ref list only
1972         * Have to find a way to make it without too much change or new vars
1973         **********************************************************************/
1974        for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
1975        {
1976            if (as_skip_mbpart[i4_reflist].i4_mb_cost < ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost)
1977            {
1978                ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = as_skip_mbpart[i4_reflist].i4_mb_cost;
1979                ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = as_skip_mbpart[i4_reflist].i4_mb_distortion;
1980                ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr = as_skip_mbpart[i4_reflist].s_mv_curr;
1981            }
1982        }
1983
1984        /***********************************************************************
1985         * Compute ME for BI
1986         *  In case of BI we do ME for two candidates
1987         *   1) The best L0 and L1 Mvs
1988         *   2) Skip L0 and L1 MVs
1989         *
1990         *   TODO
1991         *   one of the search candidates is skip. Hence it may be duplicated
1992         ***********************************************************************/
1993        if (i4_max_reflist == PRED_L1 && ps_me_ctxt->u4_min_sad_reached == 0)
1994        {
1995            WORD32 i, j = 0;
1996            WORD32 l0_srch_pos_idx, l1_srch_pos_idx;
1997            WORD32 i4_l0_skip_mv_idx, i4_l1_skip_mv_idx;
1998
1999            /* Get the free buffers */
2000            l0_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx;
2001            l1_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L1].i4_srch_pos_idx;
2002
2003            /* Search for the two free buffers in subpel list */
2004            for (i = 0; i < SUBPEL_BUFF_CNT; i++)
2005            {
2006                if (i != l0_srch_pos_idx && i != l1_srch_pos_idx)
2007                {
2008                    ps_me_ctxt->apu1_subpel_buffs[j] = ps_proc->apu1_subpel_buffs[i];
2009                    j++;
2010                }
2011            }
2012            ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
2013
2014            /* Copy the statial SKIP MV of each list */
2015            i4_l0_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L0] - 2;
2016            i4_l1_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L1] - 2;
2017            ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
2018            ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
2019            ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
2020            ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2021
2022            /* Copy the SKIP MV temporal of each list */
2023            i4_l0_skip_mv_idx++;
2024            i4_l1_skip_mv_idx++;
2025            ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
2026            ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
2027            ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
2028            ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2029
2030            /* Copy the best MV after ME */
2031            ps_me_ctxt->as_mv_init_search[PRED_BI][4] = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr;
2032            ps_me_ctxt->as_mv_init_search[PRED_BI][5] = ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr;
2033
2034            ps_me_ctxt->u4_num_candidates[PRED_BI] = 6;
2035
2036            ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_cost = INT_MAX;
2037            ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_distortion = INT_MAX;
2038
2039            ih264e_evaluate_bipred(ps_me_ctxt, ps_proc,
2040                                   &ps_me_ctxt->as_mb_part[PRED_BI]);
2041
2042            i4_max_reflist = PRED_BI;
2043        }
2044
2045        /**********************************************************************
2046         * Now get the minimum of MB part sads by searching over all ref lists
2047         **********************************************************************/
2048        ps_proc->ps_pu->b2_pred_mode = 0x3;
2049
2050        for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
2051        {
2052            if (ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost)
2053            {
2054                ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost;
2055                ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion;
2056                ps_proc->ps_cur_mb->u4_mb_type = (ps_proc->i4_slice_type == PSLICE) ? P16x16 : B16x16;
2057                ps_proc->ps_pu->b2_pred_mode = i4_reflist ;
2058            }
2059        }
2060
2061        /**********************************************************************
2062         * In case we have a BI MB, we have to copy the buffers and set proer MV's
2063         *  1)In case its BI, we need to get the best MVs given by BI and update
2064         *    to their corresponding MB part
2065         *  2)We also need to copy the buffer in which bipred buff is populated
2066         *
2067         *  Not that if we have
2068         **********************************************************************/
2069        if (ps_proc->ps_pu->b2_pred_mode == PRED_BI)
2070        {
2071            WORD32 i4_srch_pos = ps_me_ctxt->as_mb_part[PRED_BI].i4_srch_pos_idx;
2072            UWORD8 *pu1_bi_buf = ps_me_ctxt->as_mb_part[PRED_BI].pu1_best_hpel_buf;
2073
2074            ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][i4_srch_pos << 1];
2075            ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][(i4_srch_pos << 1) + 1];
2076
2077            /* Now we have to copy the buffers */
2078            ps_codec->pf_inter_pred_luma_copy(pu1_bi_buf,
2079                                              ps_proc->pu1_best_subpel_buf,
2080                                              ps_me_ctxt->u4_subpel_buf_strd,
2081                                              ps_proc->u4_bst_spel_buf_strd,
2082                                              MB_SIZE, MB_SIZE, NULL, 0);
2083
2084        }
2085        else if (ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf)
2086        {
2087            /* Now we have to copy the buffers */
2088            ps_codec->pf_inter_pred_luma_copy(
2089                            ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf,
2090                            ps_proc->pu1_best_subpel_buf,
2091                            ps_me_ctxt->u4_subpel_buf_strd,
2092                            ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
2093                            NULL, 0);
2094        }
2095    }
2096
2097    /**************************************************************************
2098     *Now copy the MVs to the current PU with qpel scaling
2099     ***************************************************************************/
2100    ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx);
2101    ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy);
2102    ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvx);
2103    ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvy);
2104
2105
2106    ps_proc->ps_pu->s_me_info[0].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L1)? -1:0;
2107    ps_proc->ps_pu->s_me_info[1].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L0)? -1:0;
2108
2109    /* number of partitions */
2110    ps_proc->u4_num_sub_partitions = 1;
2111    *(ps_proc->pu4_mb_pu_cnt) = 1;
2112
2113    /* position in-terms of PU */
2114    ps_proc->ps_pu->b4_pos_x = 0;
2115    ps_proc->ps_pu->b4_pos_y = 0;
2116
2117    /* PU size */
2118    ps_proc->ps_pu->b4_wd = 3;
2119    ps_proc->ps_pu->b4_ht = 3;
2120
2121    /* Update min sad conditions */
2122    if (ps_me_ctxt->u4_min_sad_reached == 1)
2123    {
2124        ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
2125        ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
2126    }
2127}
2128
2129